xref: /titanic_41/usr/src/uts/i86pc/os/fastboot.c (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * This file contains the functions for performing Fast Reboot -- a
28  * reboot which bypasses the firmware and bootloader, considerably
29  * reducing downtime.
30  *
31  * fastboot_load_kernel(): This function is invoked by mdpreboot() in the
32  * reboot path.  It loads the new kernel and boot archive into memory, builds
33  * the data structure containing sufficient information about the new
34  * kernel and boot archive to be passed to the fast reboot switcher
35  * (see fb_swtch_src.s for details).  When invoked the switcher relocates
36  * the new kernel and boot archive to physically contiguous low memory,
37  * similar to where the boot loader would have loaded them, and jumps to
38  * the new kernel.
39  *
40  * If fastreboot_onpanic is enabled, fastboot_load_kernel() is called
41  * by fastreboot_post_startup() to load the back up kernel in case of
42  * panic.
43  *
44  * The physical addresses of the memory allocated for the new kernel, boot
45  * archive and their page tables must be above where the boot archive ends
46  * after it has been relocated by the switcher, otherwise the new files
47  * and their page tables could be overridden during relocation.
48  *
49  * fast_reboot(): This function is invoked by mdboot() once it's determined
50  * that the system is capable of fast reboot.  It jumps to the fast reboot
51  * switcher with the data structure built by fastboot_load_kernel() as the
52  * argument.
53  */
54 
55 #include <sys/types.h>
56 #include <sys/param.h>
57 #include <sys/segments.h>
58 #include <sys/sysmacros.h>
59 #include <sys/vm.h>
60 
61 #include <sys/proc.h>
62 #include <sys/buf.h>
63 #include <sys/kmem.h>
64 
65 #include <sys/reboot.h>
66 #include <sys/uadmin.h>
67 
68 #include <sys/cred.h>
69 #include <sys/vnode.h>
70 #include <sys/file.h>
71 
72 #include <sys/cmn_err.h>
73 #include <sys/dumphdr.h>
74 #include <sys/bootconf.h>
75 #include <sys/ddidmareq.h>
76 #include <sys/varargs.h>
77 #include <sys/promif.h>
78 #include <sys/modctl.h>
79 
80 #include <vm/hat.h>
81 #include <vm/as.h>
82 #include <vm/page.h>
83 #include <vm/seg.h>
84 #include <vm/hat_i86.h>
85 #include <sys/vm_machparam.h>
86 #include <sys/archsystm.h>
87 #include <sys/machsystm.h>
88 #include <sys/mman.h>
89 #include <sys/x86_archext.h>
90 #include <sys/smp_impldefs.h>
91 #include <sys/spl.h>
92 
93 #include <sys/fastboot_impl.h>
94 #include <sys/machelf.h>
95 #include <sys/kobj.h>
96 #include <sys/multiboot.h>
97 #include <sys/kobj_lex.h>
98 
99 /*
100  * Macro to determine how many pages are needed for PTEs to map a particular
101  * file.  Allocate one extra page table entry for terminating the list.
102  */
103 #define	FASTBOOT_PTE_LIST_SIZE(fsize)	\
104 	P2ROUNDUP((((fsize) >> PAGESHIFT) + 1) * sizeof (x86pte_t), PAGESIZE)
105 
106 /*
107  * Data structure containing necessary information for the fast reboot
108  * switcher to jump to the new kernel.
109  */
110 fastboot_info_t newkernel = { 0 };
111 char		fastboot_args[OBP_MAXPATHLEN];
112 
113 static char fastboot_filename[2][OBP_MAXPATHLEN] = { { 0 }, { 0 }};
114 static x86pte_t ptp_bits = PT_VALID | PT_REF | PT_USER | PT_WRITABLE;
115 static x86pte_t pte_bits =
116     PT_VALID | PT_REF | PT_MOD | PT_NOCONSIST | PT_WRITABLE;
117 static uint_t fastboot_shift_amt_pae[] = {12, 21, 30, 39};
118 
119 /* Index into Fast Reboot not supported message array */
120 static uint32_t fastreboot_nosup_id = FBNS_DEFAULT;
121 
122 /* Fast Reboot not supported message array */
123 static const char * const fastreboot_nosup_desc[FBNS_END] = {
124 #define	fastboot_nosup_msg(id, str)	str,
125 #include <sys/fastboot_msg.h>
126 };
127 
128 int fastboot_debug = 0;
129 int fastboot_contig = 0;
130 
131 /*
132  * Fake starting va for new kernel and boot archive.
133  */
134 static uintptr_t fake_va = FASTBOOT_FAKE_VA;
135 
136 /*
137  * Reserve memory below PA 1G in preparation of fast reboot.
138  *
139  * This variable is only checked when fastreboot_capable is set, but
140  * fastreboot_onpanic is not set.  The amount of memory reserved
141  * is negligible, but just in case we are really short of low memory,
142  * this variable will give us a backdoor to not consume memory at all.
143  */
144 int reserve_mem_enabled = 1;
145 
146 /*
147  * Mutex to protect fastreboot_onpanic.
148  */
149 kmutex_t fastreboot_config_mutex;
150 
151 /*
152  * Amount of memory below PA 1G to reserve for constructing the multiboot
153  * data structure and the page tables as we tend to run out of those
154  * when more drivers are loaded.
155  */
156 static size_t fastboot_mbi_size = 0x2000;	/* 8K */
157 static size_t fastboot_pagetable_size = 0x5000;	/* 20K */
158 
159 /*
160  * Minimum system uptime in clock_t before Fast Reboot should be used
161  * on panic.  Will be initialized in fastboot_post_startup().
162  */
163 clock_t fastreboot_onpanic_uptime = LONG_MAX;
164 
165 /*
166  * lbolt value when the system booted.  This value will be used if the system
167  * panics to calculate how long the system has been up.  If the uptime is less
168  * than fastreboot_onpanic_uptime, a reboot through BIOS will be performed to
169  * avoid a potential panic/reboot loop.
170  */
171 clock_t lbolt_at_boot = LONG_MAX;
172 
173 /*
174  * Use below 1G for page tables as
175  *	1. we are only doing 1:1 mapping of the bottom 1G of physical memory.
176  *	2. we are using 2G as the fake virtual address for the new kernel and
177  *	boot archive.
178  */
179 static ddi_dma_attr_t fastboot_below_1G_dma_attr = {
180 	DMA_ATTR_V0,
181 	0x0000000008000000ULL,	/* dma_attr_addr_lo: 128MB */
182 	0x000000003FFFFFFFULL,	/* dma_attr_addr_hi: 1G */
183 	0x00000000FFFFFFFFULL,	/* dma_attr_count_max */
184 	0x0000000000001000ULL,	/* dma_attr_align: 4KB */
185 	1,			/* dma_attr_burstsize */
186 	1,			/* dma_attr_minxfer */
187 	0x00000000FFFFFFFFULL,	/* dma_attr_maxxfer */
188 	0x00000000FFFFFFFFULL,	/* dma_attr_seg */
189 	1,			/* dma_attr_sgllen */
190 	0x1000ULL,		/* dma_attr_granular */
191 	0,			/* dma_attr_flags */
192 };
193 
194 static ddi_dma_attr_t fastboot_dma_attr = {
195 	DMA_ATTR_V0,
196 	0x0000000008000000ULL,	/* dma_attr_addr_lo: 128MB */
197 #ifdef	__amd64
198 	0xFFFFFFFFFFFFFFFFULL,	/* dma_attr_addr_hi: 2^64B */
199 #else
200 	0x0000000FFFFFFFFFULL,	/* dma_attr_addr_hi: 64GB */
201 #endif	/* __amd64 */
202 	0x00000000FFFFFFFFULL,	/* dma_attr_count_max */
203 	0x0000000000001000ULL,	/* dma_attr_align: 4KB */
204 	1,			/* dma_attr_burstsize */
205 	1,			/* dma_attr_minxfer */
206 	0x00000000FFFFFFFFULL,	/* dma_attr_maxxfer */
207 	0x00000000FFFFFFFFULL,	/* dma_attr_seg */
208 	1,			/* dma_attr_sgllen */
209 	0x1000ULL,		/* dma_attr_granular */
210 	0,			/* dma_attr_flags */
211 };
212 
213 /*
214  * Various information saved from the previous boot to reconstruct
215  * multiboot_info.
216  */
217 extern multiboot_info_t saved_mbi;
218 extern mb_memory_map_t saved_mmap[FASTBOOT_SAVED_MMAP_COUNT];
219 extern uint8_t saved_drives[FASTBOOT_SAVED_DRIVES_SIZE];
220 extern char saved_cmdline[FASTBOOT_SAVED_CMDLINE_LEN];
221 extern int saved_cmdline_len;
222 extern size_t saved_file_size[];
223 
224 extern void* contig_alloc(size_t size, ddi_dma_attr_t *attr,
225     uintptr_t align, int cansleep);
226 extern void contig_free(void *addr, size_t size);
227 
228 
229 /* PRINTLIKE */
230 extern void vprintf(const char *, va_list);
231 
232 
233 /*
234  * Need to be able to get boot_archives from other places
235  */
236 #define	BOOTARCHIVE64	"/platform/i86pc/amd64/boot_archive"
237 #define	BOOTARCHIVE32	"/platform/i86pc/boot_archive"
238 #define	BOOTARCHIVE32_FAILSAFE	"/boot/x86.miniroot-safe"
239 #define	BOOTARCHIVE64_FAILSAFE	"/boot/amd64/x86.miniroot-safe"
240 #define	FAILSAFE_BOOTFILE32	"/boot/platform/i86pc/kernel/unix"
241 #define	FAILSAFE_BOOTFILE64	"/boot/platform/i86pc/kernel/amd64/unix"
242 
243 static uint_t fastboot_vatoindex(fastboot_info_t *, uintptr_t, int);
244 static void fastboot_map_with_size(fastboot_info_t *, uintptr_t,
245     paddr_t, size_t, int);
246 static void fastboot_build_pagetables(fastboot_info_t *);
247 static int fastboot_build_mbi(char *, fastboot_info_t *);
248 static void fastboot_free_file(fastboot_file_t *);
249 
250 static const char fastboot_enomem_msg[] = "!Fastboot: Couldn't allocate 0x%"
251 	PRIx64" bytes below %s to do fast reboot";
252 
253 static void
dprintf(char * fmt,...)254 dprintf(char *fmt, ...)
255 {
256 	va_list adx;
257 
258 	if (!fastboot_debug)
259 		return;
260 
261 	va_start(adx, fmt);
262 	vprintf(fmt, adx);
263 	va_end(adx);
264 }
265 
266 
267 /*
268  * Return the index corresponding to a virt address at a given page table level.
269  */
270 static uint_t
fastboot_vatoindex(fastboot_info_t * nk,uintptr_t va,int level)271 fastboot_vatoindex(fastboot_info_t *nk, uintptr_t va, int level)
272 {
273 	return ((va >> nk->fi_shift_amt[level]) & (nk->fi_ptes_per_table - 1));
274 }
275 
276 
277 /*
278  * Add mapping from vstart to pstart for the specified size.
279  * vstart, pstart and size should all have been aligned at 2M boundaries.
280  */
281 static void
fastboot_map_with_size(fastboot_info_t * nk,uintptr_t vstart,paddr_t pstart,size_t size,int level)282 fastboot_map_with_size(fastboot_info_t *nk, uintptr_t vstart, paddr_t pstart,
283     size_t size, int level)
284 {
285 	x86pte_t	pteval, *table;
286 	uintptr_t	vaddr;
287 	paddr_t		paddr;
288 	int		index, l;
289 
290 	table = (x86pte_t *)(nk->fi_pagetable_va);
291 
292 	for (l = nk->fi_top_level; l >= level; l--) {
293 
294 		index = fastboot_vatoindex(nk, vstart, l);
295 
296 		if (l == level) {
297 			/*
298 			 * Last level.  Program the page table entries.
299 			 */
300 			for (vaddr = vstart, paddr = pstart;
301 			    vaddr < vstart + size;
302 			    vaddr += (1ULL << nk->fi_shift_amt[l]),
303 			    paddr += (1ULL << nk->fi_shift_amt[l])) {
304 
305 				uint_t index = fastboot_vatoindex(nk, vaddr, l);
306 
307 				if (l > 0)
308 					pteval = paddr | pte_bits | PT_PAGESIZE;
309 				else
310 					pteval = paddr | pte_bits;
311 
312 				table[index] = pteval;
313 			}
314 		} else if (table[index] & PT_VALID) {
315 
316 			table = (x86pte_t *)
317 			    ((uintptr_t)(((paddr_t)table[index] & MMU_PAGEMASK)
318 			    - nk->fi_pagetable_pa) + nk->fi_pagetable_va);
319 		} else {
320 			/*
321 			 * Intermediate levels.
322 			 * Program with either valid bit or PTP bits.
323 			 */
324 			if (l == nk->fi_top_level) {
325 #ifdef	__amd64
326 				ASSERT(nk->fi_top_level == 3);
327 				table[index] = nk->fi_next_table_pa | ptp_bits;
328 #else
329 				table[index] = nk->fi_next_table_pa | PT_VALID;
330 #endif	/* __amd64 */
331 			} else {
332 				table[index] = nk->fi_next_table_pa | ptp_bits;
333 			}
334 			table = (x86pte_t *)(nk->fi_next_table_va);
335 			nk->fi_next_table_va += MMU_PAGESIZE;
336 			nk->fi_next_table_pa += MMU_PAGESIZE;
337 		}
338 	}
339 }
340 
341 /*
342  * Build page tables for the lower 1G of physical memory using 2M
343  * pages, and prepare page tables for mapping new kernel and boot
344  * archive pages using 4K pages.
345  */
346 static void
fastboot_build_pagetables(fastboot_info_t * nk)347 fastboot_build_pagetables(fastboot_info_t *nk)
348 {
349 	/*
350 	 * Map lower 1G physical memory.  Use large pages.
351 	 */
352 	fastboot_map_with_size(nk, 0, 0, ONE_GIG, 1);
353 
354 	/*
355 	 * Map one 4K page to get the middle page tables set up.
356 	 */
357 	fake_va = P2ALIGN_TYPED(fake_va, nk->fi_lpagesize, uintptr_t);
358 	fastboot_map_with_size(nk, fake_va,
359 	    nk->fi_files[0].fb_pte_list_va[0] & MMU_PAGEMASK, PAGESIZE, 0);
360 }
361 
362 
363 /*
364  * Sanity check.  Look for dboot offset.
365  */
366 static int
fastboot_elf64_find_dboot_load_offset(void * img,off_t imgsz,uint32_t * offp)367 fastboot_elf64_find_dboot_load_offset(void *img, off_t imgsz, uint32_t *offp)
368 {
369 	Elf64_Ehdr	*ehdr = (Elf64_Ehdr *)img;
370 	Elf64_Phdr	*phdr;
371 	uint8_t		*phdrbase;
372 	int		i;
373 
374 	if ((ehdr->e_phoff + ehdr->e_phnum * ehdr->e_phentsize) >= imgsz)
375 		return (-1);
376 
377 	phdrbase = (uint8_t *)img + ehdr->e_phoff;
378 
379 	for (i = 0; i < ehdr->e_phnum; i++) {
380 		phdr = (Elf64_Phdr *)(phdrbase + ehdr->e_phentsize * i);
381 
382 		if (phdr->p_type == PT_LOAD) {
383 			if (phdr->p_vaddr == phdr->p_paddr &&
384 			    phdr->p_vaddr == DBOOT_ENTRY_ADDRESS) {
385 				ASSERT(phdr->p_offset <= UINT32_MAX);
386 				*offp = (uint32_t)phdr->p_offset;
387 				return (0);
388 			}
389 		}
390 	}
391 
392 	return (-1);
393 }
394 
395 
396 /*
397  * Initialize text and data section information for 32-bit kernel.
398  * sectcntp - is both input/output parameter.
399  * On entry, *sectcntp contains maximum allowable number of sections;
400  * on return, it contains the actual number of sections filled.
401  */
402 static int
fastboot_elf32_find_loadables(void * img,off_t imgsz,fastboot_section_t * sectp,int * sectcntp,uint32_t * offp)403 fastboot_elf32_find_loadables(void *img, off_t imgsz, fastboot_section_t *sectp,
404     int *sectcntp, uint32_t *offp)
405 {
406 	Elf32_Ehdr	*ehdr = (Elf32_Ehdr *)img;
407 	Elf32_Phdr	*phdr;
408 	uint8_t		*phdrbase;
409 	int		i;
410 	int		used_sections = 0;
411 	const int	max_sectcnt = *sectcntp;
412 
413 	if ((ehdr->e_phoff + ehdr->e_phnum * ehdr->e_phentsize) >= imgsz)
414 		return (-1);
415 
416 	phdrbase = (uint8_t *)img + ehdr->e_phoff;
417 
418 	for (i = 0; i < ehdr->e_phnum; i++) {
419 		phdr = (Elf32_Phdr *)(phdrbase + ehdr->e_phentsize * i);
420 
421 		if (phdr->p_type == PT_INTERP)
422 			return (-1);
423 
424 		if (phdr->p_type != PT_LOAD)
425 			continue;
426 
427 		if (phdr->p_vaddr == phdr->p_paddr &&
428 		    phdr->p_paddr == DBOOT_ENTRY_ADDRESS) {
429 			*offp = (uint32_t)phdr->p_offset;
430 		} else {
431 			if (max_sectcnt <= used_sections)
432 				return (-1);
433 
434 			sectp[used_sections].fb_sec_offset = phdr->p_offset;
435 			sectp[used_sections].fb_sec_paddr = phdr->p_paddr;
436 			sectp[used_sections].fb_sec_size = phdr->p_filesz;
437 			sectp[used_sections].fb_sec_bss_size =
438 			    (phdr->p_filesz < phdr->p_memsz) ?
439 			    (phdr->p_memsz - phdr->p_filesz) : 0;
440 
441 			/* Extra sanity check for the input object file */
442 			if (sectp[used_sections].fb_sec_paddr +
443 			    sectp[used_sections].fb_sec_size +
444 			    sectp[used_sections].fb_sec_bss_size >=
445 			    DBOOT_ENTRY_ADDRESS)
446 				return (-1);
447 
448 			used_sections++;
449 		}
450 	}
451 
452 	*sectcntp = used_sections;
453 	return (0);
454 }
455 
456 /*
457  * Create multiboot info structure (mbi) base on the saved mbi.
458  * Recalculate values of the pointer type fields in the data
459  * structure based on the new starting physical address of the
460  * data structure.
461  */
462 static int
fastboot_build_mbi(char * mdep,fastboot_info_t * nk)463 fastboot_build_mbi(char *mdep, fastboot_info_t *nk)
464 {
465 	mb_module_t	*mbp;
466 	multiboot_info_t	*mbi;	/* pointer to multiboot structure */
467 	uintptr_t	start_addr_va;	/* starting VA of mbi */
468 	uintptr_t	start_addr_pa;	/* starting PA of mbi */
469 	size_t		offs = 0;	/* offset from the starting address */
470 	size_t		arglen;		/* length of the command line arg */
471 	size_t		size;	/* size of the memory reserved for mbi */
472 	size_t		mdnsz;	/* length of the boot archive name */
473 
474 	/*
475 	 * If mdep is not NULL or empty, use the length of mdep + 1
476 	 * (for NULL terminating) as the length of the new command
477 	 * line; else use the saved command line length as the
478 	 * length for the new command line.
479 	 */
480 	if (mdep != NULL && strlen(mdep) != 0) {
481 		arglen = strlen(mdep) + 1;
482 	} else {
483 		arglen = saved_cmdline_len;
484 	}
485 
486 	/*
487 	 * Allocate memory for the new multiboot info structure (mbi).
488 	 * If we have reserved memory for mbi but it's not enough,
489 	 * free it and reallocate.
490 	 */
491 	size = PAGESIZE + P2ROUNDUP(arglen, PAGESIZE);
492 	if (nk->fi_mbi_size && nk->fi_mbi_size < size) {
493 		contig_free((void *)nk->fi_new_mbi_va, nk->fi_mbi_size);
494 		nk->fi_mbi_size = 0;
495 	}
496 
497 	if (nk->fi_mbi_size == 0) {
498 		if ((nk->fi_new_mbi_va =
499 		    (uintptr_t)contig_alloc(size, &fastboot_below_1G_dma_attr,
500 		    PAGESIZE, 0)) == NULL) {
501 			cmn_err(CE_NOTE, fastboot_enomem_msg,
502 			    (uint64_t)size, "1G");
503 			return (-1);
504 		}
505 		/*
506 		 * fi_mbi_size must be set after the allocation succeeds
507 		 * as it's used to determine how much memory to free.
508 		 */
509 		nk->fi_mbi_size = size;
510 	}
511 
512 	/*
513 	 * Initalize memory
514 	 */
515 	bzero((void *)nk->fi_new_mbi_va, nk->fi_mbi_size);
516 
517 	/*
518 	 * Get PA for the new mbi
519 	 */
520 	start_addr_va = nk->fi_new_mbi_va;
521 	start_addr_pa = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
522 	    (caddr_t)start_addr_va));
523 	nk->fi_new_mbi_pa = (paddr_t)start_addr_pa;
524 
525 	/*
526 	 * Populate the rest of the fields in the data structure
527 	 */
528 
529 	/*
530 	 * Copy from the saved mbi to preserve all non-pointer type fields.
531 	 */
532 	mbi = (multiboot_info_t *)start_addr_va;
533 	bcopy(&saved_mbi, mbi, sizeof (*mbi));
534 
535 	/*
536 	 * Recalculate mods_addr.  Set mod_start and mod_end based on
537 	 * the physical address of the new boot archive.  Set mod_name
538 	 * to the name of the new boto archive.
539 	 */
540 	offs += sizeof (multiboot_info_t);
541 	mbi->mods_addr = start_addr_pa + offs;
542 	mbp = (mb_module_t *)(start_addr_va + offs);
543 	mbp->mod_start = nk->fi_files[FASTBOOT_BOOTARCHIVE].fb_dest_pa;
544 	mbp->mod_end = nk->fi_files[FASTBOOT_BOOTARCHIVE].fb_next_pa;
545 
546 	offs += sizeof (mb_module_t);
547 	mdnsz = strlen(fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE]) + 1;
548 	bcopy(fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE],
549 	    (void *)(start_addr_va + offs), mdnsz);
550 	mbp->mod_name = start_addr_pa + offs;
551 	mbp->reserved = 0;
552 
553 	/*
554 	 * Make sure the offset is 16-byte aligned to avoid unaligned access.
555 	 */
556 	offs += mdnsz;
557 	offs = P2ROUNDUP_TYPED(offs, 16, size_t);
558 
559 	/*
560 	 * Recalculate mmap_addr
561 	 */
562 	mbi->mmap_addr = start_addr_pa + offs;
563 	bcopy((void *)(uintptr_t)saved_mmap, (void *)(start_addr_va + offs),
564 	    saved_mbi.mmap_length);
565 	offs += saved_mbi.mmap_length;
566 
567 	/*
568 	 * Recalculate drives_addr
569 	 */
570 	mbi->drives_addr = start_addr_pa + offs;
571 	bcopy((void *)(uintptr_t)saved_drives, (void *)(start_addr_va + offs),
572 	    saved_mbi.drives_length);
573 	offs += saved_mbi.drives_length;
574 
575 	/*
576 	 * Recalculate the address of cmdline.  Set cmdline to contain the
577 	 * new boot argument.
578 	 */
579 	mbi->cmdline = start_addr_pa + offs;
580 
581 	if (mdep != NULL && strlen(mdep) != 0) {
582 		bcopy(mdep, (void *)(start_addr_va + offs), arglen);
583 	} else {
584 		bcopy((void *)saved_cmdline, (void *)(start_addr_va + offs),
585 		    arglen);
586 	}
587 
588 	/* clear fields and flags that are not copied */
589 	bzero(&mbi->config_table,
590 	    sizeof (*mbi) - offsetof(multiboot_info_t, config_table));
591 	mbi->flags &= ~(MB_INFO_CONFIG_TABLE | MB_INFO_BOOT_LOADER_NAME |
592 	    MB_INFO_APM_TABLE | MB_INFO_VIDEO_INFO);
593 
594 	return (0);
595 }
596 
597 /*
598  * Initialize HAT related fields
599  */
600 static void
fastboot_init_fields(fastboot_info_t * nk)601 fastboot_init_fields(fastboot_info_t *nk)
602 {
603 	if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
604 		nk->fi_has_pae = 1;
605 		nk->fi_shift_amt = fastboot_shift_amt_pae;
606 		nk->fi_ptes_per_table = 512;
607 		nk->fi_lpagesize = (2 << 20);	/* 2M */
608 #ifdef	__amd64
609 		nk->fi_top_level = 3;
610 #else
611 		nk->fi_top_level = 2;
612 #endif	/* __amd64 */
613 	}
614 }
615 
616 /*
617  * Process boot argument
618  */
619 static void
fastboot_parse_mdep(char * mdep,char * kern_bootpath,int * bootpath_len,char * bootargs)620 fastboot_parse_mdep(char *mdep, char *kern_bootpath, int *bootpath_len,
621     char *bootargs)
622 {
623 	int	i;
624 
625 	/*
626 	 * If mdep is not NULL, it comes in the format of
627 	 *	mountpoint unix args
628 	 */
629 	if (mdep != NULL && strlen(mdep) != 0) {
630 		if (mdep[0] != '-') {
631 			/* First get the root argument */
632 			i = 0;
633 			while (mdep[i] != '\0' && mdep[i] != ' ') {
634 				i++;
635 			}
636 
637 			if (i < 4 || strncmp(&mdep[i-4], "unix", 4) != 0) {
638 				/* mount point */
639 				bcopy(mdep, kern_bootpath, i);
640 				kern_bootpath[i] = '\0';
641 				*bootpath_len = i;
642 
643 				/*
644 				 * Get the next argument. It should be unix as
645 				 * we have validated in in halt.c.
646 				 */
647 				if (strlen(mdep) > i) {
648 					mdep += (i + 1);
649 					i = 0;
650 					while (mdep[i] != '\0' &&
651 					    mdep[i] != ' ') {
652 						i++;
653 					}
654 				}
655 
656 			}
657 			bcopy(mdep, kern_bootfile, i);
658 			kern_bootfile[i] = '\0';
659 			bcopy(mdep, bootargs, strlen(mdep));
660 		} else {
661 			int off = strlen(kern_bootfile);
662 			bcopy(kern_bootfile, bootargs, off);
663 			bcopy(" ", &bootargs[off++], 1);
664 			bcopy(mdep, &bootargs[off], strlen(mdep));
665 			off += strlen(mdep);
666 			bootargs[off] = '\0';
667 		}
668 	}
669 }
670 
671 /*
672  * Reserve memory under PA 1G for mapping the new kernel and boot archive.
673  * This function is only called if fastreboot_onpanic is *not* set.
674  */
675 static void
fastboot_reserve_mem(fastboot_info_t * nk)676 fastboot_reserve_mem(fastboot_info_t *nk)
677 {
678 	int i;
679 
680 	/*
681 	 * A valid kernel is in place.  No need to reserve any memory.
682 	 */
683 	if (nk->fi_valid)
684 		return;
685 
686 	/*
687 	 * Reserve memory under PA 1G for PTE lists.
688 	 */
689 	for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
690 		fastboot_file_t *fb = &nk->fi_files[i];
691 		size_t fsize_roundup, size;
692 
693 		fsize_roundup = P2ROUNDUP_TYPED(saved_file_size[i],
694 		    PAGESIZE, size_t);
695 		size = FASTBOOT_PTE_LIST_SIZE(fsize_roundup);
696 		if ((fb->fb_pte_list_va = contig_alloc(size,
697 		    &fastboot_below_1G_dma_attr, PAGESIZE, 0)) == NULL) {
698 			return;
699 		}
700 		fb->fb_pte_list_size = size;
701 	}
702 
703 	/*
704 	 * Reserve memory under PA 1G for page tables.
705 	 */
706 	if ((nk->fi_pagetable_va =
707 	    (uintptr_t)contig_alloc(fastboot_pagetable_size,
708 	    &fastboot_below_1G_dma_attr, PAGESIZE, 0)) == NULL) {
709 		return;
710 	}
711 	nk->fi_pagetable_size = fastboot_pagetable_size;
712 
713 	/*
714 	 * Reserve memory under PA 1G for multiboot structure.
715 	 */
716 	if ((nk->fi_new_mbi_va = (uintptr_t)contig_alloc(fastboot_mbi_size,
717 	    &fastboot_below_1G_dma_attr, PAGESIZE, 0)) == NULL) {
718 		return;
719 	}
720 	nk->fi_mbi_size = fastboot_mbi_size;
721 }
722 
723 /*
724  * Calculate MD5 digest for the given fastboot_file.
725  * Assumes that the file is allready loaded properly.
726  */
727 static void
fastboot_cksum_file(fastboot_file_t * fb,uchar_t * md5_hash)728 fastboot_cksum_file(fastboot_file_t *fb, uchar_t *md5_hash)
729 {
730 	MD5_CTX md5_ctx;
731 
732 	MD5Init(&md5_ctx);
733 	MD5Update(&md5_ctx, (void *)fb->fb_va, fb->fb_size);
734 	MD5Final(md5_hash, &md5_ctx);
735 }
736 
737 /*
738  * Free up the memory we have allocated for a file
739  */
740 static void
fastboot_free_file(fastboot_file_t * fb)741 fastboot_free_file(fastboot_file_t *fb)
742 {
743 	size_t	fsize_roundup;
744 
745 	fsize_roundup = P2ROUNDUP_TYPED(fb->fb_size, PAGESIZE, size_t);
746 	if (fsize_roundup) {
747 		contig_free((void *)fb->fb_va, fsize_roundup);
748 		fb->fb_va = NULL;
749 		fb->fb_size = 0;
750 	}
751 }
752 
753 /*
754  * Free up memory used by the PTEs for a file.
755  */
756 static void
fastboot_free_file_pte(fastboot_file_t * fb,uint64_t endaddr)757 fastboot_free_file_pte(fastboot_file_t *fb, uint64_t endaddr)
758 {
759 	if (fb->fb_pte_list_size && fb->fb_pte_list_pa < endaddr) {
760 		contig_free((void *)fb->fb_pte_list_va, fb->fb_pte_list_size);
761 		fb->fb_pte_list_va = 0;
762 		fb->fb_pte_list_pa = 0;
763 		fb->fb_pte_list_size = 0;
764 	}
765 }
766 
767 /*
768  * Free up all the memory used for representing a kernel with
769  * fastboot_info_t.
770  */
771 static void
fastboot_free_mem(fastboot_info_t * nk,uint64_t endaddr)772 fastboot_free_mem(fastboot_info_t *nk, uint64_t endaddr)
773 {
774 	int i;
775 
776 	for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
777 		fastboot_free_file(nk->fi_files + i);
778 		fastboot_free_file_pte(nk->fi_files + i, endaddr);
779 	}
780 
781 	if (nk->fi_pagetable_size && nk->fi_pagetable_pa < endaddr) {
782 		contig_free((void *)nk->fi_pagetable_va, nk->fi_pagetable_size);
783 		nk->fi_pagetable_va = 0;
784 		nk->fi_pagetable_pa = 0;
785 		nk->fi_pagetable_size = 0;
786 	}
787 
788 	if (nk->fi_mbi_size && nk->fi_new_mbi_pa < endaddr) {
789 		contig_free((void *)nk->fi_new_mbi_va, nk->fi_mbi_size);
790 		nk->fi_new_mbi_va = 0;
791 		nk->fi_new_mbi_pa = 0;
792 		nk->fi_mbi_size = 0;
793 	}
794 }
795 
796 /*
797  * Only free up the memory allocated for the kernel and boot archive,
798  * but not for the page tables.
799  */
800 void
fastboot_free_newkernel(fastboot_info_t * nk)801 fastboot_free_newkernel(fastboot_info_t *nk)
802 {
803 	int i;
804 
805 	nk->fi_valid = 0;
806 	/*
807 	 * Free the memory we have allocated
808 	 */
809 	for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
810 		fastboot_free_file(&(nk->fi_files[i]));
811 	}
812 }
813 
814 static void
fastboot_cksum_cdata(fastboot_info_t * nk,uchar_t * md5_hash)815 fastboot_cksum_cdata(fastboot_info_t *nk, uchar_t *md5_hash)
816 {
817 	int i;
818 	MD5_CTX md5_ctx;
819 
820 	MD5Init(&md5_ctx);
821 	for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
822 		MD5Update(&md5_ctx, nk->fi_files[i].fb_pte_list_va,
823 		    nk->fi_files[i].fb_pte_list_size);
824 	}
825 	MD5Update(&md5_ctx, (void *)nk->fi_pagetable_va, nk->fi_pagetable_size);
826 	MD5Update(&md5_ctx, (void *)nk->fi_new_mbi_va, nk->fi_mbi_size);
827 
828 	MD5Final(md5_hash, &md5_ctx);
829 }
830 
831 /*
832  * Generate MD5 checksum of the given kernel.
833  */
834 static void
fastboot_cksum_generate(fastboot_info_t * nk)835 fastboot_cksum_generate(fastboot_info_t *nk)
836 {
837 	int i;
838 
839 	for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
840 		fastboot_cksum_file(nk->fi_files + i, nk->fi_md5_hash[i]);
841 	}
842 	fastboot_cksum_cdata(nk, nk->fi_md5_hash[i]);
843 }
844 
845 /*
846  * Calculate MD5 checksum of the given kernel and verify that
847  * it matches with what was calculated before.
848  */
849 int
fastboot_cksum_verify(fastboot_info_t * nk)850 fastboot_cksum_verify(fastboot_info_t *nk)
851 {
852 	int i;
853 	uchar_t md5_hash[MD5_DIGEST_LENGTH];
854 
855 	for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
856 		fastboot_cksum_file(nk->fi_files + i, md5_hash);
857 		if (bcmp(nk->fi_md5_hash[i], md5_hash,
858 		    sizeof (nk->fi_md5_hash[i])) != 0)
859 			return (i + 1);
860 	}
861 
862 	fastboot_cksum_cdata(nk, md5_hash);
863 	if (bcmp(nk->fi_md5_hash[i], md5_hash,
864 	    sizeof (nk->fi_md5_hash[i])) != 0)
865 		return (i + 1);
866 
867 	return (0);
868 }
869 
870 /*
871  * This function performs the following tasks:
872  * - Read the sizes of the new kernel and boot archive.
873  * - Allocate memory for the new kernel and boot archive.
874  * - Allocate memory for page tables necessary for mapping the memory
875  *   allocated for the files.
876  * - Read the new kernel and boot archive into memory.
877  * - Map in the fast reboot switcher.
878  * - Load the fast reboot switcher to FASTBOOT_SWTCH_PA.
879  * - Build the new multiboot_info structure
880  * - Build page tables for the low 1G of physical memory.
881  * - Mark the data structure as valid if all steps have succeeded.
882  */
883 void
fastboot_load_kernel(char * mdep)884 fastboot_load_kernel(char *mdep)
885 {
886 	void		*buf = NULL;
887 	int		i;
888 	fastboot_file_t	*fb;
889 	uint32_t	dboot_start_offset;
890 	char		kern_bootpath[OBP_MAXPATHLEN];
891 	extern uintptr_t postbootkernelbase;
892 	uintptr_t	saved_kernelbase;
893 	int		bootpath_len = 0;
894 	int		is_failsafe = 0;
895 	int		is_retry = 0;
896 	uint64_t	end_addr;
897 
898 	if (!fastreboot_capable)
899 		return;
900 
901 	if (newkernel.fi_valid)
902 		fastboot_free_newkernel(&newkernel);
903 
904 	saved_kernelbase = postbootkernelbase;
905 
906 	postbootkernelbase = 0;
907 
908 	/*
909 	 * Initialize various HAT related fields in the data structure
910 	 */
911 	fastboot_init_fields(&newkernel);
912 
913 	bzero(kern_bootpath, OBP_MAXPATHLEN);
914 
915 	/*
916 	 * Process the boot argument
917 	 */
918 	bzero(fastboot_args, OBP_MAXPATHLEN);
919 	fastboot_parse_mdep(mdep, kern_bootpath, &bootpath_len, fastboot_args);
920 
921 	/*
922 	 * Make sure we get the null character
923 	 */
924 	bcopy(kern_bootpath, fastboot_filename[FASTBOOT_NAME_UNIX],
925 	    bootpath_len);
926 	bcopy(kern_bootfile,
927 	    &fastboot_filename[FASTBOOT_NAME_UNIX][bootpath_len],
928 	    strlen(kern_bootfile) + 1);
929 
930 	bcopy(kern_bootpath, fastboot_filename[FASTBOOT_NAME_BOOTARCHIVE],
931 	    bootpath_len);
932 
933 	if (bcmp(kern_bootfile, FAILSAFE_BOOTFILE32,
934 	    (sizeof (FAILSAFE_BOOTFILE32) - 1)) == 0 ||
935 	    bcmp(kern_bootfile, FAILSAFE_BOOTFILE64,
936 	    (sizeof (FAILSAFE_BOOTFILE64) - 1)) == 0) {
937 		is_failsafe = 1;
938 	}
939 
940 load_kernel_retry:
941 	/*
942 	 * Read in unix and boot_archive
943 	 */
944 	end_addr = DBOOT_ENTRY_ADDRESS;
945 	for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
946 		struct _buf	*file;
947 		uintptr_t	va;
948 		uint64_t	fsize;
949 		size_t		fsize_roundup, pt_size;
950 		int		page_index;
951 		uintptr_t	offset;
952 		ddi_dma_attr_t dma_attr = fastboot_dma_attr;
953 
954 
955 		dprintf("fastboot_filename[%d] = %s\n",
956 		    i, fastboot_filename[i]);
957 
958 		if ((file = kobj_open_file(fastboot_filename[i])) ==
959 		    (struct _buf *)-1) {
960 			cmn_err(CE_NOTE, "!Fastboot: Couldn't open %s",
961 			    fastboot_filename[i]);
962 			goto err_out;
963 		}
964 
965 		if (kobj_get_filesize(file, &fsize) != 0) {
966 			cmn_err(CE_NOTE,
967 			    "!Fastboot: Couldn't get filesize for %s",
968 			    fastboot_filename[i]);
969 			goto err_out;
970 		}
971 
972 		fsize_roundup = P2ROUNDUP_TYPED(fsize, PAGESIZE, size_t);
973 
974 		/*
975 		 * Where the files end in physical memory after being
976 		 * relocated by the fast boot switcher.
977 		 */
978 		end_addr += fsize_roundup;
979 		if (end_addr > fastboot_below_1G_dma_attr.dma_attr_addr_hi) {
980 			cmn_err(CE_NOTE, "!Fastboot: boot archive is too big");
981 			goto err_out;
982 		}
983 
984 		/*
985 		 * Adjust dma_attr_addr_lo so that the new kernel and boot
986 		 * archive will not be overridden during relocation.
987 		 */
988 		if (end_addr > fastboot_dma_attr.dma_attr_addr_lo ||
989 		    end_addr > fastboot_below_1G_dma_attr.dma_attr_addr_lo) {
990 
991 			if (is_retry) {
992 				/*
993 				 * If we have already tried and didn't succeed,
994 				 * just give up.
995 				 */
996 				cmn_err(CE_NOTE,
997 				    "!Fastboot: boot archive is too big");
998 				goto err_out;
999 			} else {
1000 				/* Set the flag so we don't keep retrying */
1001 				is_retry++;
1002 
1003 				/* Adjust dma_attr_addr_lo */
1004 				fastboot_dma_attr.dma_attr_addr_lo = end_addr;
1005 				fastboot_below_1G_dma_attr.dma_attr_addr_lo =
1006 				    end_addr;
1007 
1008 				/*
1009 				 * Free the memory we have already allocated
1010 				 * whose physical addresses might not fit
1011 				 * the new lo and hi constraints.
1012 				 */
1013 				fastboot_free_mem(&newkernel, end_addr);
1014 				goto load_kernel_retry;
1015 			}
1016 		}
1017 
1018 
1019 		if (!fastboot_contig)
1020 			dma_attr.dma_attr_sgllen = (fsize / PAGESIZE) +
1021 			    (((fsize % PAGESIZE) == 0) ? 0 : 1);
1022 
1023 		if ((buf = contig_alloc(fsize, &dma_attr, PAGESIZE, 0))
1024 		    == NULL) {
1025 			cmn_err(CE_NOTE, fastboot_enomem_msg, fsize, "64G");
1026 			goto err_out;
1027 		}
1028 
1029 		va = P2ROUNDUP_TYPED((uintptr_t)buf, PAGESIZE, uintptr_t);
1030 
1031 		if (kobj_read_file(file, (char *)va, fsize, 0) < 0) {
1032 			cmn_err(CE_NOTE, "!Fastboot: Couldn't read %s",
1033 			    fastboot_filename[i]);
1034 			goto err_out;
1035 		}
1036 
1037 		fb = &newkernel.fi_files[i];
1038 		fb->fb_va = va;
1039 		fb->fb_size = fsize;
1040 		fb->fb_sectcnt = 0;
1041 
1042 		pt_size = FASTBOOT_PTE_LIST_SIZE(fsize_roundup);
1043 
1044 		/*
1045 		 * If we have reserved memory but it not enough, free it.
1046 		 */
1047 		if (fb->fb_pte_list_size && fb->fb_pte_list_size < pt_size) {
1048 			contig_free((void *)fb->fb_pte_list_va,
1049 			    fb->fb_pte_list_size);
1050 			fb->fb_pte_list_size = 0;
1051 		}
1052 
1053 		if (fb->fb_pte_list_size == 0) {
1054 			if ((fb->fb_pte_list_va =
1055 			    (x86pte_t *)contig_alloc(pt_size,
1056 			    &fastboot_below_1G_dma_attr, PAGESIZE, 0))
1057 			    == NULL) {
1058 				cmn_err(CE_NOTE, fastboot_enomem_msg,
1059 				    (uint64_t)pt_size, "1G");
1060 				goto err_out;
1061 			}
1062 			/*
1063 			 * fb_pte_list_size must be set after the allocation
1064 			 * succeeds as it's used to determine how much memory to
1065 			 * free.
1066 			 */
1067 			fb->fb_pte_list_size = pt_size;
1068 		}
1069 
1070 		bzero((void *)(fb->fb_pte_list_va), fb->fb_pte_list_size);
1071 
1072 		fb->fb_pte_list_pa = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
1073 		    (caddr_t)fb->fb_pte_list_va));
1074 
1075 		for (page_index = 0, offset = 0; offset < fb->fb_size;
1076 		    offset += PAGESIZE) {
1077 			uint64_t paddr;
1078 
1079 			paddr = mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
1080 			    (caddr_t)fb->fb_va + offset));
1081 
1082 			ASSERT(paddr >= fastboot_dma_attr.dma_attr_addr_lo);
1083 
1084 			/*
1085 			 * Include the pte_bits so we don't have to make
1086 			 * it in assembly.
1087 			 */
1088 			fb->fb_pte_list_va[page_index++] = (x86pte_t)
1089 			    (paddr | pte_bits);
1090 		}
1091 
1092 		fb->fb_pte_list_va[page_index] = FASTBOOT_TERMINATE;
1093 
1094 		if (i == FASTBOOT_UNIX) {
1095 			Ehdr	*ehdr = (Ehdr *)va;
1096 			int	j;
1097 
1098 			/*
1099 			 * Sanity checks:
1100 			 */
1101 			for (j = 0; j < SELFMAG; j++) {
1102 				if (ehdr->e_ident[j] != ELFMAG[j]) {
1103 					cmn_err(CE_NOTE, "!Fastboot: Bad ELF "
1104 					    "signature");
1105 					goto err_out;
1106 				}
1107 			}
1108 
1109 			if (ehdr->e_ident[EI_CLASS] == ELFCLASS32 &&
1110 			    ehdr->e_ident[EI_DATA] == ELFDATA2LSB &&
1111 			    ehdr->e_machine == EM_386) {
1112 
1113 				fb->fb_sectcnt = sizeof (fb->fb_sections) /
1114 				    sizeof (fb->fb_sections[0]);
1115 
1116 				if (fastboot_elf32_find_loadables((void *)va,
1117 				    fsize, &fb->fb_sections[0],
1118 				    &fb->fb_sectcnt, &dboot_start_offset) < 0) {
1119 					cmn_err(CE_NOTE, "!Fastboot: ELF32 "
1120 					    "program section failure");
1121 					goto err_out;
1122 				}
1123 
1124 				if (fb->fb_sectcnt == 0) {
1125 					cmn_err(CE_NOTE, "!Fastboot: No ELF32 "
1126 					    "program sections found");
1127 					goto err_out;
1128 				}
1129 
1130 				if (is_failsafe) {
1131 					/* Failsafe boot_archive */
1132 					bcopy(BOOTARCHIVE32_FAILSAFE,
1133 					    &fastboot_filename
1134 					    [FASTBOOT_NAME_BOOTARCHIVE]
1135 					    [bootpath_len],
1136 					    sizeof (BOOTARCHIVE32_FAILSAFE));
1137 				} else {
1138 					bcopy(BOOTARCHIVE32,
1139 					    &fastboot_filename
1140 					    [FASTBOOT_NAME_BOOTARCHIVE]
1141 					    [bootpath_len],
1142 					    sizeof (BOOTARCHIVE32));
1143 				}
1144 
1145 			} else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64 &&
1146 			    ehdr->e_ident[EI_DATA] == ELFDATA2LSB &&
1147 			    ehdr->e_machine == EM_AMD64) {
1148 
1149 				if (fastboot_elf64_find_dboot_load_offset(
1150 				    (void *)va, fsize, &dboot_start_offset)
1151 				    != 0) {
1152 					cmn_err(CE_NOTE, "!Fastboot: Couldn't "
1153 					    "find ELF64 dboot entry offset");
1154 					goto err_out;
1155 				}
1156 
1157 				if (!is_x86_feature(x86_featureset,
1158 				    X86FSET_64) ||
1159 				    !is_x86_feature(x86_featureset,
1160 				    X86FSET_PAE)) {
1161 					cmn_err(CE_NOTE, "Fastboot: Cannot "
1162 					    "reboot to %s: "
1163 					    "not a 64-bit capable system",
1164 					    kern_bootfile);
1165 					goto err_out;
1166 				}
1167 
1168 				if (is_failsafe) {
1169 					/* Failsafe boot_archive */
1170 					bcopy(BOOTARCHIVE64_FAILSAFE,
1171 					    &fastboot_filename
1172 					    [FASTBOOT_NAME_BOOTARCHIVE]
1173 					    [bootpath_len],
1174 					    sizeof (BOOTARCHIVE64_FAILSAFE));
1175 				} else {
1176 					bcopy(BOOTARCHIVE64,
1177 					    &fastboot_filename
1178 					    [FASTBOOT_NAME_BOOTARCHIVE]
1179 					    [bootpath_len],
1180 					    sizeof (BOOTARCHIVE64));
1181 				}
1182 			} else {
1183 				cmn_err(CE_NOTE, "!Fastboot: Unknown ELF type");
1184 				goto err_out;
1185 			}
1186 
1187 			fb->fb_dest_pa = DBOOT_ENTRY_ADDRESS -
1188 			    dboot_start_offset;
1189 
1190 			fb->fb_next_pa = DBOOT_ENTRY_ADDRESS + fsize_roundup;
1191 		} else {
1192 			fb->fb_dest_pa = newkernel.fi_files[i - 1].fb_next_pa;
1193 			fb->fb_next_pa = fb->fb_dest_pa + fsize_roundup;
1194 		}
1195 
1196 		kobj_close_file(file);
1197 
1198 	}
1199 
1200 	/*
1201 	 * Add the function that will switch us to 32-bit protected mode
1202 	 */
1203 	fb = &newkernel.fi_files[FASTBOOT_SWTCH];
1204 	fb->fb_va = fb->fb_dest_pa = FASTBOOT_SWTCH_PA;
1205 	fb->fb_size = MMU_PAGESIZE;
1206 
1207 	hat_devload(kas.a_hat, (caddr_t)fb->fb_va,
1208 	    MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa),
1209 	    PROT_READ | PROT_WRITE | PROT_EXEC,
1210 	    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
1211 
1212 	/*
1213 	 * Build the new multiboot_info structure
1214 	 */
1215 	if (fastboot_build_mbi(fastboot_args, &newkernel) != 0) {
1216 		goto err_out;
1217 	}
1218 
1219 	/*
1220 	 * Build page table for low 1G physical memory. Use big pages.
1221 	 * Allocate 4 (5 for amd64) pages for the page tables.
1222 	 *    1 page for PML4 (amd64)
1223 	 *    1 page for Page-Directory-Pointer Table
1224 	 *    2 pages for Page Directory
1225 	 *    1 page for Page Table.
1226 	 * The page table entry will be rewritten to map the physical
1227 	 * address as we do the copying.
1228 	 */
1229 	if (newkernel.fi_has_pae) {
1230 #ifdef	__amd64
1231 		size_t size = MMU_PAGESIZE * 5;
1232 #else
1233 		size_t size = MMU_PAGESIZE * 4;
1234 #endif	/* __amd64 */
1235 
1236 		if (newkernel.fi_pagetable_size && newkernel.fi_pagetable_size
1237 		    < size) {
1238 			contig_free((void *)newkernel.fi_pagetable_va,
1239 			    newkernel.fi_pagetable_size);
1240 			newkernel.fi_pagetable_size = 0;
1241 		}
1242 
1243 		if (newkernel.fi_pagetable_size == 0) {
1244 			if ((newkernel.fi_pagetable_va = (uintptr_t)
1245 			    contig_alloc(size, &fastboot_below_1G_dma_attr,
1246 			    MMU_PAGESIZE, 0)) == NULL) {
1247 				cmn_err(CE_NOTE, fastboot_enomem_msg,
1248 				    (uint64_t)size, "1G");
1249 				goto err_out;
1250 			}
1251 			/*
1252 			 * fi_pagetable_size must be set after the allocation
1253 			 * succeeds as it's used to determine how much memory to
1254 			 * free.
1255 			 */
1256 			newkernel.fi_pagetable_size = size;
1257 		}
1258 
1259 		bzero((void *)(newkernel.fi_pagetable_va), size);
1260 
1261 		newkernel.fi_pagetable_pa =
1262 		    mmu_ptob((uint64_t)hat_getpfnum(kas.a_hat,
1263 		    (caddr_t)newkernel.fi_pagetable_va));
1264 
1265 		newkernel.fi_last_table_pa = newkernel.fi_pagetable_pa +
1266 		    size - MMU_PAGESIZE;
1267 
1268 		newkernel.fi_next_table_va = newkernel.fi_pagetable_va +
1269 		    MMU_PAGESIZE;
1270 		newkernel.fi_next_table_pa = newkernel.fi_pagetable_pa +
1271 		    MMU_PAGESIZE;
1272 
1273 		fastboot_build_pagetables(&newkernel);
1274 	}
1275 
1276 
1277 	/* Generate MD5 checksums */
1278 	fastboot_cksum_generate(&newkernel);
1279 
1280 	/* Mark it as valid */
1281 	newkernel.fi_valid = 1;
1282 	newkernel.fi_magic = FASTBOOT_MAGIC;
1283 
1284 	postbootkernelbase = saved_kernelbase;
1285 	return;
1286 
1287 err_out:
1288 	postbootkernelbase = saved_kernelbase;
1289 	newkernel.fi_valid = 0;
1290 	fastboot_free_newkernel(&newkernel);
1291 }
1292 
1293 
1294 /* ARGSUSED */
1295 static int
fastboot_xc_func(fastboot_info_t * nk,xc_arg_t unused2,xc_arg_t unused3)1296 fastboot_xc_func(fastboot_info_t *nk, xc_arg_t unused2, xc_arg_t unused3)
1297 {
1298 	void (*fastboot_func)(fastboot_info_t *);
1299 	fastboot_file_t	*fb = &nk->fi_files[FASTBOOT_SWTCH];
1300 	fastboot_func = (void (*)())(fb->fb_va);
1301 	kthread_t *t_intr = curthread->t_intr;
1302 
1303 	if (&kas != curproc->p_as) {
1304 		hat_devload(curproc->p_as->a_hat, (caddr_t)fb->fb_va,
1305 		    MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa),
1306 		    PROT_READ | PROT_WRITE | PROT_EXEC,
1307 		    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
1308 	}
1309 
1310 	/*
1311 	 * If we have pinned a thread, make sure the address is mapped
1312 	 * in the address space of the pinned thread.
1313 	 */
1314 	if (t_intr && t_intr->t_procp->p_as->a_hat != curproc->p_as->a_hat &&
1315 	    t_intr->t_procp->p_as != &kas)
1316 		hat_devload(t_intr->t_procp->p_as->a_hat, (caddr_t)fb->fb_va,
1317 		    MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa),
1318 		    PROT_READ | PROT_WRITE | PROT_EXEC,
1319 		    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
1320 
1321 	(*psm_shutdownf)(A_SHUTDOWN, AD_FASTREBOOT);
1322 	(*fastboot_func)(nk);
1323 
1324 	/*NOTREACHED*/
1325 	return (0);
1326 }
1327 
1328 /*
1329  * Jump to the fast reboot switcher.  This function never returns.
1330  */
1331 void
fast_reboot()1332 fast_reboot()
1333 {
1334 	processorid_t bootcpuid = 0;
1335 	extern uintptr_t postbootkernelbase;
1336 	extern char	fb_swtch_image[];
1337 	fastboot_file_t	*fb;
1338 	int i;
1339 
1340 	postbootkernelbase = 0;
1341 
1342 	fb = &newkernel.fi_files[FASTBOOT_SWTCH];
1343 
1344 	/*
1345 	 * Map the address into both the current proc's address
1346 	 * space and the kernel's address space in case the panic
1347 	 * is forced by kmdb.
1348 	 */
1349 	if (&kas != curproc->p_as) {
1350 		hat_devload(curproc->p_as->a_hat, (caddr_t)fb->fb_va,
1351 		    MMU_PAGESIZE, mmu_btop(fb->fb_dest_pa),
1352 		    PROT_READ | PROT_WRITE | PROT_EXEC,
1353 		    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
1354 	}
1355 
1356 	bcopy((void *)fb_swtch_image, (void *)fb->fb_va, fb->fb_size);
1357 
1358 
1359 	/*
1360 	 * Set fb_va to fake_va
1361 	 */
1362 	for (i = 0; i < FASTBOOT_MAX_FILES_MAP; i++) {
1363 		newkernel.fi_files[i].fb_va = fake_va;
1364 
1365 	}
1366 
1367 	if (panicstr && CPU->cpu_id != bootcpuid &&
1368 	    CPU_ACTIVE(cpu_get(bootcpuid))) {
1369 		extern void panic_idle(void);
1370 		cpuset_t cpuset;
1371 
1372 		CPUSET_ZERO(cpuset);
1373 		CPUSET_ADD(cpuset, bootcpuid);
1374 		xc_priority((xc_arg_t)&newkernel, 0, 0, CPUSET2BV(cpuset),
1375 		    (xc_func_t)fastboot_xc_func);
1376 
1377 		panic_idle();
1378 	} else
1379 		(void) fastboot_xc_func(&newkernel, 0, 0);
1380 }
1381 
1382 
1383 /*
1384  * Get boot property value for fastreboot_onpanic.
1385  *
1386  * NOTE: If fastreboot_onpanic is set to non-zero in /etc/system,
1387  * new setting passed in via "-B fastreboot_onpanic" is ignored.
1388  * This order of precedence is to enable developers debugging panics
1389  * that occur early in boot to utilize Fast Reboot on panic.
1390  */
1391 static void
fastboot_get_bootprop(void)1392 fastboot_get_bootprop(void)
1393 {
1394 	int		val = 0xaa, len, ret;
1395 	dev_info_t	*devi;
1396 	char		*propstr = NULL;
1397 
1398 	devi = ddi_root_node();
1399 
1400 	ret = ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
1401 	    FASTREBOOT_ONPANIC, &propstr);
1402 
1403 	if (ret == DDI_PROP_SUCCESS) {
1404 		if (FASTREBOOT_ONPANIC_NOTSET(propstr))
1405 			val = 0;
1406 		else if (FASTREBOOT_ONPANIC_ISSET(propstr))
1407 			val = UA_FASTREBOOT_ONPANIC;
1408 
1409 		/*
1410 		 * Only set fastreboot_onpanic to the value passed in
1411 		 * if it's not already set to non-zero, and the value
1412 		 * has indeed been passed in via command line.
1413 		 */
1414 		if (!fastreboot_onpanic && val != 0xaa)
1415 			fastreboot_onpanic = val;
1416 		ddi_prop_free(propstr);
1417 	} else if (ret != DDI_PROP_NOT_FOUND && ret != DDI_PROP_UNDEFINED) {
1418 		cmn_err(CE_NOTE, "!%s value is invalid, will be ignored",
1419 		    FASTREBOOT_ONPANIC);
1420 	}
1421 
1422 	len = sizeof (fastreboot_onpanic_cmdline);
1423 	ret = ddi_getlongprop_buf(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
1424 	    FASTREBOOT_ONPANIC_CMDLINE, fastreboot_onpanic_cmdline, &len);
1425 
1426 	if (ret == DDI_PROP_BUF_TOO_SMALL)
1427 		cmn_err(CE_NOTE, "!%s value is too long, will be ignored",
1428 		    FASTREBOOT_ONPANIC_CMDLINE);
1429 }
1430 
1431 /*
1432  * This function is called by main() to either load the backup kernel for panic
1433  * fast reboot, or to reserve low physical memory for fast reboot.
1434  */
1435 void
fastboot_post_startup()1436 fastboot_post_startup()
1437 {
1438 	lbolt_at_boot = ddi_get_lbolt();
1439 
1440 	/* Default to 10 minutes */
1441 	if (fastreboot_onpanic_uptime == LONG_MAX)
1442 		fastreboot_onpanic_uptime = SEC_TO_TICK(10 * 60);
1443 
1444 	if (!fastreboot_capable)
1445 		return;
1446 
1447 	mutex_enter(&fastreboot_config_mutex);
1448 
1449 	fastboot_get_bootprop();
1450 
1451 	if (fastreboot_onpanic)
1452 		fastboot_load_kernel(fastreboot_onpanic_cmdline);
1453 	else if (reserve_mem_enabled)
1454 		fastboot_reserve_mem(&newkernel);
1455 
1456 	mutex_exit(&fastreboot_config_mutex);
1457 }
1458 
1459 /*
1460  * Update boot configuration settings.
1461  * If the new fastreboot_onpanic setting is false, and a kernel has
1462  * been preloaded, free the memory;
1463  * if the new fastreboot_onpanic setting is true and newkernel is
1464  * not valid, load the new kernel.
1465  */
1466 void
fastboot_update_config(const char * mdep)1467 fastboot_update_config(const char *mdep)
1468 {
1469 	uint8_t boot_config = (uint8_t)*mdep;
1470 	int cur_fastreboot_onpanic;
1471 
1472 	if (!fastreboot_capable)
1473 		return;
1474 
1475 	mutex_enter(&fastreboot_config_mutex);
1476 
1477 	cur_fastreboot_onpanic = fastreboot_onpanic;
1478 	fastreboot_onpanic = boot_config & UA_FASTREBOOT_ONPANIC;
1479 
1480 	if (fastreboot_onpanic && (!cur_fastreboot_onpanic ||
1481 	    !newkernel.fi_valid))
1482 		fastboot_load_kernel(fastreboot_onpanic_cmdline);
1483 	if (cur_fastreboot_onpanic && !fastreboot_onpanic)
1484 		fastboot_free_newkernel(&newkernel);
1485 
1486 	mutex_exit(&fastreboot_config_mutex);
1487 }
1488 
1489 /*
1490  * This is an internal interface to disable Fast Reboot on Panic.
1491  * It frees up memory allocated for the backup kernel and sets
1492  * fastreboot_onpanic to zero.
1493  */
1494 static void
fastreboot_onpanic_disable(void)1495 fastreboot_onpanic_disable(void)
1496 {
1497 	uint8_t boot_config = (uint8_t)(~UA_FASTREBOOT_ONPANIC);
1498 	fastboot_update_config((const char *)&boot_config);
1499 }
1500 
1501 /*
1502  * This is the interface to be called by fm_panic() in case FMA has diagnosed
1503  * a terminal machine check exception.  It does not free up memory allocated
1504  * for the backup kernel.  General disabling fastreboot_onpanic in a
1505  * non-panicking situation must go through fastboot_onpanic_disable().
1506  */
1507 void
fastreboot_disable_highpil(void)1508 fastreboot_disable_highpil(void)
1509 {
1510 	fastreboot_onpanic = 0;
1511 }
1512 
1513 /*
1514  * This is an internal interface to disable Fast Reboot by Default.
1515  * It does not free up memory allocated for the backup kernel.
1516  */
1517 static void
fastreboot_capable_disable(uint32_t msgid)1518 fastreboot_capable_disable(uint32_t msgid)
1519 {
1520 	if (fastreboot_capable != 0) {
1521 		fastreboot_capable = 0;
1522 		if (msgid < sizeof (fastreboot_nosup_desc) /
1523 		    sizeof (fastreboot_nosup_desc[0]))
1524 			fastreboot_nosup_id = msgid;
1525 		else
1526 			fastreboot_nosup_id = FBNS_DEFAULT;
1527 	}
1528 }
1529 
1530 /*
1531  * This is the kernel interface for disabling
1532  * Fast Reboot by Default and Fast Reboot on Panic.
1533  * Frees up memory allocated for the backup kernel.
1534  * General disabling of the Fast Reboot by Default feature should be done
1535  * via the userland interface scf_fastreboot_default_set_transient().
1536  */
1537 void
fastreboot_disable(uint32_t msgid)1538 fastreboot_disable(uint32_t msgid)
1539 {
1540 	fastreboot_capable_disable(msgid);
1541 	fastreboot_onpanic_disable();
1542 }
1543 
1544 /*
1545  * Returns Fast Reboot not support message for fastreboot_nosup_id.
1546  * If fastreboot_nosup_id contains invalid index, default
1547  * Fast Reboot not support message is returned.
1548  */
1549 const char *
fastreboot_nosup_message(void)1550 fastreboot_nosup_message(void)
1551 {
1552 	uint32_t msgid;
1553 
1554 	msgid = fastreboot_nosup_id;
1555 	if (msgid >= sizeof (fastreboot_nosup_desc) /
1556 	    sizeof (fastreboot_nosup_desc[0]))
1557 		msgid = FBNS_DEFAULT;
1558 
1559 	return (fastreboot_nosup_desc[msgid]);
1560 }
1561 
1562 /*
1563  * A simplified interface for uadmin to call to update the configuration
1564  * setting and load a new kernel if necessary.
1565  */
1566 void
fastboot_update_and_load(int fcn,char * mdep)1567 fastboot_update_and_load(int fcn, char *mdep)
1568 {
1569 	if (fcn != AD_FASTREBOOT) {
1570 		/*
1571 		 * If user has explicitly requested reboot to prom,
1572 		 * or uadmin(1M) was invoked with other functions,
1573 		 * don't try to fast reboot after dumping.
1574 		 */
1575 		fastreboot_onpanic_disable();
1576 	}
1577 
1578 	mutex_enter(&fastreboot_config_mutex);
1579 
1580 	if (fastreboot_onpanic)
1581 		fastboot_load_kernel(mdep);
1582 
1583 	mutex_exit(&fastreboot_config_mutex);
1584 }
1585