1 /*-
2 * Copyright (C) 2010-2014 Nathan Whitehorn
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
18 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
21 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
22 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
23 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include <stand.h>
27 #include <sys/param.h>
28 #include <sys/boot.h>
29 #ifdef LOADER_FDT_SUPPORT
30 #include <fdt_platform.h>
31 #endif
32
33 #include <machine/cpufunc.h>
34 #include <bootstrap.h>
35 #include "host_syscall.h"
36 #include "kboot.h"
37 #include "stand.h"
38 #include <smbios.h>
39
40 int kboot_getdev(void **vdev, const char *devspec, const char **path);
41 ssize_t kboot_copyin(const void *src, vm_offset_t dest, const size_t len);
42 ssize_t kboot_copyout(vm_offset_t src, void *dest, const size_t len);
43 ssize_t kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len);
44 int kboot_autoload(void);
45 static void kboot_zfs_probe(void);
46
47 struct arch_switch archsw = {
48 .arch_getdev = kboot_getdev,
49 .arch_copyin = kboot_copyin,
50 .arch_copyout = kboot_copyout,
51 .arch_readin = kboot_readin,
52 .arch_autoload = kboot_autoload,
53 .arch_zfs_probe = kboot_zfs_probe,
54 };
55
56 extern int command_fdt_internal(int argc, char *argv[]);
57
58 /*
59 * On amd64, KERNSTART is where the first actual kernel page is mapped, after
60 * the compatibility mapping. We reserve 2MB at the start of the address space
61 * for the page tables, etc, and so need to offset this there (and only there).
62 * The loader needs to know about this so we can pad everything to the proper
63 * place in PA. Ideally, we'd include vmparam.h to figure this out, but the
64 * macros it uses are not easily available in this compile environment, so we
65 * hard code that knowledge here.
66 */
67 #if defined(__amd64__)
68 #define KERN_PADDING (2 << 20)
69 #else
70 #define KERN_PADDING 0
71 #endif
72
73 #define PA_INVAL (vm_offset_t)-1
74 static vm_offset_t pa_start = PA_INVAL;
75 static vm_offset_t offset;
76
77 static uint64_t commit_limit;
78 static uint64_t committed_as;
79 static uint64_t mem_avail;
80
81 static void
memory_limits(void)82 memory_limits(void)
83 {
84 int fd;
85 char buf[128];
86
87 /*
88 * To properly size the slabs, we need to find how much memory we can
89 * commit to using. commit_limit is the max, while commited_as is the
90 * current total. We can use these later to allocate the largetst amount
91 * of memory possible so we can support larger ram disks than we could
92 * by using fixed segment sizes. We also grab the memory available so
93 * we don't use more than 49% of that.
94 */
95 fd = open("host:/proc/meminfo", O_RDONLY);
96 if (fd != -1) {
97 while (fgetstr(buf, sizeof(buf), fd) > 0) {
98 if (strncmp(buf, "MemAvailable:", 13) == 0) {
99 mem_avail = strtoll(buf + 13, NULL, 0);
100 mem_avail <<= 10; /* Units are kB */
101 } else if (strncmp(buf, "CommitLimit:", 12) == 0) {
102 commit_limit = strtoll(buf + 13, NULL, 0);
103 commit_limit <<= 10; /* Units are kB */
104 } else if (strncmp(buf, "Committed_AS:", 13) == 0) {
105 committed_as = strtoll(buf + 14, NULL, 0);
106 committed_as <<= 10; /* Units are kB */
107 }
108 }
109 } else {
110 /* Otherwise, on FreeBSD host, for testing 32GB host: */
111 mem_avail = 31ul << 30; /* 31GB free */
112 commit_limit = mem_avail * 9 / 10; /* 90% comittable */
113 committed_as = 20ul << 20; /* 20MB used */
114 }
115 printf("Commit limit: %lld Committed bytes %lld Available %lld\n",
116 (long long)commit_limit, (long long)committed_as,
117 (long long)mem_avail);
118 close(fd);
119 }
120
121 /*
122 * NB: getdev should likely be identical to this most places, except maybe
123 * we should move to storing the length of the platform devdesc.
124 */
125 int
kboot_getdev(void ** vdev,const char * devspec,const char ** path)126 kboot_getdev(void **vdev, const char *devspec, const char **path)
127 {
128 struct devdesc **dev = (struct devdesc **)vdev;
129 int rv;
130
131 /*
132 * If it looks like this is just a path and no device, go with the
133 * current device.
134 */
135 if (devspec == NULL || strchr(devspec, ':') == NULL) {
136 if (((rv = devparse(dev, getenv("currdev"), NULL)) == 0) &&
137 (path != NULL))
138 *path = devspec;
139 return (rv);
140 }
141
142 /*
143 * Try to parse the device name off the beginning of the devspec
144 */
145 return (devparse(dev, devspec, path));
146 }
147
148 static int
parse_args(int argc,const char ** argv)149 parse_args(int argc, const char **argv)
150 {
151 int howto = 0;
152
153 /*
154 * When run as init, sometimes argv[0] is a EFI-ESP path, other times
155 * it's the name of the init program, and sometimes it's a placeholder
156 * string, so we exclude it here. For the other args, look for DOS-like
157 * and Unix-like absolte paths and exclude parsing it if we find that,
158 * otherwise parse it as a command arg (so looking for '-X', 'foo' or
159 * 'foo=bar'). This is a little different than EFI where it argv[0]
160 * often times is the first argument passed in. There are cases when
161 * linux-booting via EFI that we have the EFI path we used to run
162 * bootXXX.efi as the arguments to init, so we need to exclude the paths
163 * there as well.
164 */
165 for (int i = 1; i < argc; i++) {
166 if (argv[i][0] != '\\' && argv[i][0] != '/') {
167 howto |= boot_parse_arg(argv[i]);
168 }
169 }
170
171 return (howto);
172 }
173
174 static vm_offset_t rsdp;
175
176 static vm_offset_t
kboot_rsdp_from_efi(void)177 kboot_rsdp_from_efi(void)
178 {
179 char buffer[512 + 1];
180 char *walker, *ep;
181
182 if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer)))
183 return (0); /* Not an EFI system */
184 ep = buffer + strlen(buffer);
185 walker = buffer;
186 while (walker < ep) {
187 if (strncmp("ACPI20=", walker, 7) == 0)
188 return((vm_offset_t)strtoull(walker + 7, NULL, 0));
189 if (strncmp("ACPI=", walker, 5) == 0)
190 return((vm_offset_t)strtoull(walker + 5, NULL, 0));
191 walker += strcspn(walker, "\n") + 1;
192 }
193 return (0);
194 }
195
196 static void
find_acpi(void)197 find_acpi(void)
198 {
199 rsdp = kboot_rsdp_from_efi();
200 #if 0 /* maybe for amd64 */
201 if (rsdp == 0)
202 rsdp = find_rsdp_arch();
203 #endif
204 }
205
206 vm_offset_t
acpi_rsdp(void)207 acpi_rsdp(void)
208 {
209 return (rsdp);
210 }
211
212 bool
has_acpi(void)213 has_acpi(void)
214 {
215 return rsdp != 0;
216 }
217
218 /*
219 * SMBIOS support. We map the physical memory address we get into a VA in this
220 * address space with mmap with 64k pages. Once we're done, we cleanup any
221 * mappings we made.
222 */
223
224 #define MAX_MAP 10
225 #define PAGE (64<<10)
226
227 static struct mapping
228 {
229 uintptr_t pa;
230 caddr_t va;
231 } map[MAX_MAP];
232 static bool smbios_mmap_file;
233 static int smbios_fd;
234 static int nmap;
235
ptov(uintptr_t pa)236 caddr_t ptov(uintptr_t pa)
237 {
238 caddr_t va;
239 uintptr_t pa2;
240 struct mapping *m = map;
241
242 if (smbios_mmap_file)
243 pa2 = rounddown(pa, PAGE);
244 else
245 pa2 = pa;
246 for (int i = 0; i < nmap; i++, m++) {
247 if (m->pa == pa2) {
248 return (m->va + pa - m->pa);
249 }
250 }
251 if (!smbios_mmap_file)
252 panic("Out of bounds smbios access");
253 if (nmap == MAX_MAP)
254 panic("Too many maps for smbios");
255
256 /*
257 * host_mmap returns small negative numbers on errors, can't return an
258 * error here, so we have to panic. The Linux wrapper will set errno
259 * based on this and then return HOST_MAP_FAILED. Since we're calling
260 * the raw system call we have to do that ourselves.
261 */
262 va = host_mmap(0, PAGE, HOST_PROT_READ, HOST_MAP_SHARED, smbios_fd, pa2);
263 if (is_linux_error((long)va))
264 panic("smbios mmap offset %#jx failed", (uintmax_t)pa2);
265 m = &map[nmap++];
266 m->pa = pa2;
267 m->va = va;
268 return (m->va + pa - m->pa);
269 }
270
271 static void
smbios_cleanup(void)272 smbios_cleanup(void)
273 {
274 for (int i = 0; i < nmap; i++) {
275 host_munmap(map[i].va, PAGE);
276 }
277 }
278
279 static vm_offset_t
kboot_find_smbios(void)280 kboot_find_smbios(void)
281 {
282 char buffer[512 + 1];
283 char *walker, *ep;
284
285 if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer)))
286 return (0); /* Not an EFI system */
287 ep = buffer + strlen(buffer);
288 walker = buffer;
289 while (walker <= ep) {
290 /*
291 * Linux outputs the v3 table first if present, so we will
292 * choose it in priority.
293 */
294 if (strncmp("SMBIOS3=", walker, 8) == 0)
295 return((vm_offset_t)strtoull(walker + 8, NULL, 0));
296 if (strncmp("SMBIOS=", walker, 7) == 0)
297 return((vm_offset_t)strtoull(walker + 7, NULL, 0));
298 walker += strcspn(walker, "\n") + 1;
299 }
300 return (0);
301 }
302
303 static void
find_smbios(void)304 find_smbios(void)
305 {
306 char buf[40];
307 void *dmi_data;
308 uintptr_t pa;
309 caddr_t va;
310
311 pa = kboot_find_smbios();
312 printf("SMBIOS at %#jx\n", (uintmax_t)pa);
313 if (pa == 0)
314 return;
315
316 dmi_data = NULL;
317 smbios_fd = host_open("/sys/firmware/dmi/tables/DMI", O_RDONLY, 0);
318 if (smbios_fd >= 0) {
319 struct host_kstat sb;
320 struct mapping *m;
321
322 if (host_fstat(smbios_fd, &sb) < 0) {
323 host_close(smbios_fd);
324 goto try_dev_mem;
325 }
326
327 dmi_data = malloc(sb.st_size);
328 if (dmi_data == NULL) {
329 host_close(smbios_fd);
330 goto try_dev_mem;
331 }
332
333 host_read(smbios_fd, dmi_data, sb.st_size);
334
335 m = &map[nmap++];
336 m->pa = pa;
337 m->va = dmi_data;
338 smbios_mmap_file = false;
339 } else {
340 try_dev_mem:
341 smbios_fd = host_open("/dev/mem", O_RDONLY, 0);
342 if (smbios_fd < 0) {
343 printf("Can't open /sys/firmware/dmi/tables/DMI or "
344 "/dev/mem to read smbios\n");
345 return;
346 }
347 smbios_mmap_file = true;
348 }
349 snprintf(buf, sizeof(buf), "%#jx", (uintmax_t)pa);
350 setenv("hint.smbios.0.mem", buf, 1);
351
352 va = ptov(pa);
353 printf("Start of smbios at pa %p va %p\n", (void *)pa, va);
354 smbios_detect(va);
355 smbios_cleanup();
356 free(dmi_data);
357 host_close(smbios_fd);
358 }
359
360 static void
parse_file(const char * fn)361 parse_file(const char *fn)
362 {
363 struct stat st;
364 int fd = -1;
365 char *env = NULL;
366
367 if (stat(fn, &st) != 0)
368 return;
369 fd = open(fn, O_RDONLY);
370 if (fd == -1)
371 return;
372 env = malloc(st.st_size + 1);
373 if (env == NULL)
374 goto out;
375 if (read(fd, env, st.st_size) != st.st_size)
376 goto out;
377 env[st.st_size] = '\0';
378 boot_parse_cmdline(env);
379 out:
380 free(env);
381 close(fd);
382 }
383
384
385 int
main(int argc,const char ** argv)386 main(int argc, const char **argv)
387 {
388 void *heapbase;
389 const size_t heapsize = 64*1024*1024;
390 const char *bootdev;
391
392 /* Give us a sane world if we're running as init */
393 do_init();
394
395 /*
396 * Setup the heap, 64MB is minimum for ZFS booting
397 */
398 heapbase = host_getmem(heapsize);
399 setheap(heapbase, heapbase + heapsize);
400
401 /*
402 * Set up console so we get error messages.
403 */
404 cons_probe();
405
406 /*
407 * Find acpi and smbios, if they exists. This allows command line and
408 * later scripts to override if necessary.
409 */
410 find_acpi();
411 find_smbios();
412
413 /* Parse the command line args -- ignoring for now the console selection */
414 parse_args(argc, argv);
415
416 hostfs_root = getenv("hostfs_root");
417 if (hostfs_root == NULL)
418 hostfs_root = "/";
419
420 /* Initialize all the devices */
421 devinit();
422
423 /* Figure out where we're booting from */
424 bootdev = getenv("bootdev");
425 if (bootdev == NULL)
426 bootdev = hostdisk_gen_probe();
427 #if defined(LOADER_ZFS_SUPPORT)
428 if (bootdev == NULL || strcmp(bootdev, "zfs:") == 0) {
429 /*
430 * Pseudo device that says go find the right ZFS pool. This will be
431 * the first pool that we find that passes the sanity checks (eg looks
432 * like it might be vbootable) and sets currdev to the right thing based
433 * on active BEs, etc
434 */
435 if (hostdisk_zfs_find_default())
436 bootdev = getenv("currdev");
437 }
438 #endif
439 if (bootdev == NULL)
440 bootdev = "host:/";
441 if (bootdev != NULL) {
442 /*
443 * Otherwise, honor what's on the command line. If we've been
444 * given a specific ZFS partition, then we'll honor it w/o BE
445 * processing that would otherwise pick a different snapshot to
446 * boot than the default one in the pool.
447 */
448 set_currdev(bootdev);
449 } else {
450 panic("Bootdev is still NULL");
451 }
452
453 printf("Boot device: %s with hostfs_root %s\n", bootdev, hostfs_root);
454
455 printf("\n%s", bootprog_info);
456
457 setenv("LINES", "24", 1);
458
459 memory_limits();
460 enumerate_memory_arch();
461
462 interact(); /* doesn't return */
463
464 return (0);
465 }
466
467 void
exit(int code)468 exit(int code)
469 {
470 host_exit(code);
471 __unreachable();
472 }
473
474 void
delay(int usecs)475 delay(int usecs)
476 {
477 struct host_timeval tvi, tv;
478 uint64_t ti, t;
479 host_gettimeofday(&tvi, NULL);
480 ti = tvi.tv_sec*1000000 + tvi.tv_usec;
481 do {
482 host_gettimeofday(&tv, NULL);
483 t = tv.tv_sec*1000000 + tv.tv_usec;
484 } while (t < ti + usecs);
485 }
486
487 time_t
getsecs(void)488 getsecs(void)
489 {
490 struct host_timeval tv;
491 host_gettimeofday(&tv, NULL);
492 return (tv.tv_sec);
493 }
494
495 time_t
time(time_t * tloc)496 time(time_t *tloc)
497 {
498 time_t rv;
499
500 rv = getsecs();
501 if (tloc != NULL)
502 *tloc = rv;
503
504 return (rv);
505 }
506
507 struct host_kexec_segment loaded_segments[HOST_KEXEC_SEGMENT_MAX];
508 int nkexec_segments = 0;
509
510 #define SEGALIGN (1ul<<20)
511
512 static ssize_t
get_phys_buffer(vm_offset_t dest,const size_t len,void ** buf)513 get_phys_buffer(vm_offset_t dest, const size_t len, void **buf)
514 {
515 int i = 0;
516 const size_t segsize = 64*1024*1024;
517 size_t sz, amt, l;
518
519 if (nkexec_segments == HOST_KEXEC_SEGMENT_MAX)
520 panic("Tried to load too many kexec segments");
521 for (i = 0; i < nkexec_segments; i++) {
522 if (dest >= (vm_offset_t)loaded_segments[i].mem &&
523 dest < (vm_offset_t)loaded_segments[i].mem +
524 loaded_segments[i].bufsz) /* Need to use bufsz since memsz is in use size */
525 goto out;
526 }
527
528 sz = segsize;
529 if (nkexec_segments == 0) {
530 /* how much space does this segment have */
531 sz = space_avail(dest);
532 /* Clip to 45% of available memory (need 2 copies) */
533 sz = MIN(sz, rounddown2(mem_avail * 45 / 100, SEGALIGN));
534 printf("limit to 45%% of mem_avail %zd\n", sz);
535 /* And only use 95% of what we can allocate */
536 sz = MIN(sz,
537 rounddown2((commit_limit - committed_as) * 95 / 100, SEGALIGN));
538 printf("Allocating %zd MB for first segment\n", sz >> 20);
539 }
540
541 loaded_segments[nkexec_segments].buf = host_getmem(sz);
542 loaded_segments[nkexec_segments].bufsz = sz;
543 loaded_segments[nkexec_segments].mem = (void *)rounddown2(dest,SEGALIGN);
544 loaded_segments[nkexec_segments].memsz = 0;
545
546 i = nkexec_segments;
547 nkexec_segments++;
548
549 out:
550 /*
551 * Keep track of the highest amount used in a segment
552 */
553 amt = dest - (vm_offset_t)loaded_segments[i].mem;
554 l = min(len,loaded_segments[i].bufsz - amt);
555 *buf = loaded_segments[i].buf + amt;
556 if (amt + l > loaded_segments[i].memsz)
557 loaded_segments[i].memsz = amt + l;
558 return (l);
559 }
560
561 ssize_t
kboot_copyin(const void * src,vm_offset_t dest,const size_t len)562 kboot_copyin(const void *src, vm_offset_t dest, const size_t len)
563 {
564 ssize_t segsize, remainder;
565 void *destbuf;
566
567 if (pa_start == PA_INVAL) {
568 pa_start = kboot_get_phys_load_segment();
569 offset = dest;
570 get_phys_buffer(pa_start, len, &destbuf);
571 }
572
573 remainder = len;
574 do {
575 segsize = get_phys_buffer(dest + pa_start + KERN_PADDING - offset, remainder, &destbuf);
576 bcopy(src, destbuf, segsize);
577 remainder -= segsize;
578 src += segsize;
579 dest += segsize;
580 } while (remainder > 0);
581
582 return (len);
583 }
584
585 ssize_t
kboot_copyout(vm_offset_t src,void * dest,const size_t len)586 kboot_copyout(vm_offset_t src, void *dest, const size_t len)
587 {
588 ssize_t segsize, remainder;
589 void *srcbuf;
590
591 remainder = len;
592 do {
593 segsize = get_phys_buffer(src + pa_start + KERN_PADDING - offset, remainder, &srcbuf);
594 bcopy(srcbuf, dest, segsize);
595 remainder -= segsize;
596 src += segsize;
597 dest += segsize;
598 } while (remainder > 0);
599
600 return (len);
601 }
602
603 ssize_t
kboot_readin(readin_handle_t fd,vm_offset_t dest,const size_t len)604 kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
605 {
606 void *buf;
607 size_t resid, chunk, get;
608 ssize_t got;
609 vm_offset_t p;
610
611 p = dest;
612
613 chunk = min(PAGE_SIZE, len);
614 buf = malloc(chunk);
615 if (buf == NULL) {
616 printf("kboot_readin: buf malloc failed\n");
617 return (0);
618 }
619
620 for (resid = len; resid > 0; resid -= got, p += got) {
621 get = min(chunk, resid);
622 got = VECTX_READ(fd, buf, get);
623 if (got <= 0) {
624 if (got < 0)
625 printf("kboot_readin: read failed\n");
626 break;
627 }
628
629 kboot_copyin(buf, p, got);
630 }
631
632 free (buf);
633 return (len - resid);
634 }
635
636 int
kboot_autoload(void)637 kboot_autoload(void)
638 {
639
640 return (0);
641 }
642
643 void
kboot_kseg_get(int * nseg,void ** ptr)644 kboot_kseg_get(int *nseg, void **ptr)
645 {
646 printf("kseg_get: %d segments\n", nkexec_segments);
647 printf("VA SZ PA MEMSZ\n");
648 printf("---------------- -------- ---------------- -----\n");
649 for (int a = 0; a < nkexec_segments; a++) {
650 /*
651 * Truncate each segment to just what we've used in the segment,
652 * rounded up to the next page.
653 */
654 loaded_segments[a].memsz = roundup2(loaded_segments[a].memsz,PAGE_SIZE);
655 loaded_segments[a].bufsz = loaded_segments[a].memsz;
656 printf("%016jx %08jx %016jx %08jx\n",
657 (uintmax_t)loaded_segments[a].buf,
658 (uintmax_t)loaded_segments[a].bufsz,
659 (uintmax_t)loaded_segments[a].mem,
660 (uintmax_t)loaded_segments[a].memsz);
661 }
662
663 *nseg = nkexec_segments;
664 *ptr = &loaded_segments[0];
665 }
666
667 static void
kboot_zfs_probe(void)668 kboot_zfs_probe(void)
669 {
670 #if defined(LOADER_ZFS_SUPPORT)
671 /*
672 * Open all the disks and partitions we can find to see if there are ZFS
673 * pools on them.
674 */
675 hostdisk_zfs_probe();
676 #endif
677 }
678
679 #ifdef LOADER_FDT_SUPPORT
680 /*
681 * Since proper fdt command handling function is defined in fdt_loader_cmd.c,
682 * and declaring it as extern is in contradiction with COMMAND_SET() macro
683 * (which uses static pointer), we're defining wrapper function, which
684 * calls the proper fdt handling routine.
685 */
686 static int
command_fdt(int argc,char * argv[])687 command_fdt(int argc, char *argv[])
688 {
689
690 return (command_fdt_internal(argc, argv));
691 }
692
693 COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt);
694 #endif
695
696 /*
697 * Support quitting.
698 */
699 static int
command_quit(int argc,char * argv[])700 command_quit(int argc, char *argv[])
701 {
702 exit(0);
703 return (CMD_OK);
704 }
705
706 COMMAND_SET(quit, "quit", "exit the program", command_quit);
707