1 /* 2 * Handle the memory map. 3 * The functions here do the job until bootmem takes over. 4 * 5 * Getting sanitize_e820_map() in sync with i386 version by applying change: 6 * - Provisions for empty E820 memory regions (reported by certain BIOSes). 7 * Alex Achenbach <xela@slit.de>, December 2002. 8 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 9 * 10 */ 11 #include <linux/kernel.h> 12 #include <linux/types.h> 13 #include <linux/init.h> 14 #include <linux/crash_dump.h> 15 #include <linux/export.h> 16 #include <linux/bootmem.h> 17 #include <linux/pfn.h> 18 #include <linux/suspend.h> 19 #include <linux/acpi.h> 20 #include <linux/firmware-map.h> 21 #include <linux/memblock.h> 22 #include <linux/sort.h> 23 24 #include <asm/e820.h> 25 #include <asm/proto.h> 26 #include <asm/setup.h> 27 #include <asm/cpufeature.h> 28 29 /* 30 * The e820 map is the map that gets modified e.g. with command line parameters 31 * and that is also registered with modifications in the kernel resource tree 32 * with the iomem_resource as parent. 33 * 34 * The e820_saved is directly saved after the BIOS-provided memory map is 35 * copied. It doesn't get modified afterwards. It's registered for the 36 * /sys/firmware/memmap interface. 37 * 38 * That memory map is not modified and is used as base for kexec. The kexec'd 39 * kernel should get the same memory map as the firmware provides. Then the 40 * user can e.g. boot the original kernel with mem=1G while still booting the 41 * next kernel with full memory. 42 */ 43 static struct e820map initial_e820 __initdata; 44 static struct e820map initial_e820_saved __initdata; 45 struct e820map *e820 __refdata = &initial_e820; 46 struct e820map *e820_saved __refdata = &initial_e820_saved; 47 48 /* For PCI or other memory-mapped resources */ 49 unsigned long pci_mem_start = 0xaeedbabe; 50 #ifdef CONFIG_PCI 51 EXPORT_SYMBOL(pci_mem_start); 52 #endif 53 54 /* 55 * This function checks if any part of the range <start,end> is mapped 56 * with type. 57 */ 58 int 59 e820_any_mapped(u64 start, u64 end, unsigned type) 60 { 61 int i; 62 63 for (i = 0; i < e820->nr_map; i++) { 64 struct e820entry *ei = &e820->map[i]; 65 66 if (type && ei->type != type) 67 continue; 68 if (ei->addr >= end || ei->addr + ei->size <= start) 69 continue; 70 return 1; 71 } 72 return 0; 73 } 74 EXPORT_SYMBOL_GPL(e820_any_mapped); 75 76 /* 77 * This function checks if the entire range <start,end> is mapped with type. 78 * 79 * Note: this function only works correct if the e820 table is sorted and 80 * not-overlapping, which is the case 81 */ 82 int __init e820_all_mapped(u64 start, u64 end, unsigned type) 83 { 84 int i; 85 86 for (i = 0; i < e820->nr_map; i++) { 87 struct e820entry *ei = &e820->map[i]; 88 89 if (type && ei->type != type) 90 continue; 91 /* is the region (part) in overlap with the current region ?*/ 92 if (ei->addr >= end || ei->addr + ei->size <= start) 93 continue; 94 95 /* if the region is at the beginning of <start,end> we move 96 * start to the end of the region since it's ok until there 97 */ 98 if (ei->addr <= start) 99 start = ei->addr + ei->size; 100 /* 101 * if start is now at or beyond end, we're done, full 102 * coverage 103 */ 104 if (start >= end) 105 return 1; 106 } 107 return 0; 108 } 109 110 /* 111 * Add a memory region to the kernel e820 map. 112 */ 113 static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, 114 int type) 115 { 116 int x = e820x->nr_map; 117 118 if (x >= ARRAY_SIZE(e820x->map)) { 119 printk(KERN_ERR "e820: too many entries; ignoring [mem %#010llx-%#010llx]\n", 120 (unsigned long long) start, 121 (unsigned long long) (start + size - 1)); 122 return; 123 } 124 125 e820x->map[x].addr = start; 126 e820x->map[x].size = size; 127 e820x->map[x].type = type; 128 e820x->nr_map++; 129 } 130 131 void __init e820_add_region(u64 start, u64 size, int type) 132 { 133 __e820_add_region(e820, start, size, type); 134 } 135 136 static void __init e820_print_type(u32 type) 137 { 138 switch (type) { 139 case E820_RAM: 140 case E820_RESERVED_KERN: 141 printk(KERN_CONT "usable"); 142 break; 143 case E820_RESERVED: 144 printk(KERN_CONT "reserved"); 145 break; 146 case E820_ACPI: 147 printk(KERN_CONT "ACPI data"); 148 break; 149 case E820_NVS: 150 printk(KERN_CONT "ACPI NVS"); 151 break; 152 case E820_UNUSABLE: 153 printk(KERN_CONT "unusable"); 154 break; 155 case E820_PMEM: 156 case E820_PRAM: 157 printk(KERN_CONT "persistent (type %u)", type); 158 break; 159 default: 160 printk(KERN_CONT "type %u", type); 161 break; 162 } 163 } 164 165 void __init e820_print_map(char *who) 166 { 167 int i; 168 169 for (i = 0; i < e820->nr_map; i++) { 170 printk(KERN_INFO "%s: [mem %#018Lx-%#018Lx] ", who, 171 (unsigned long long) e820->map[i].addr, 172 (unsigned long long) 173 (e820->map[i].addr + e820->map[i].size - 1)); 174 e820_print_type(e820->map[i].type); 175 printk(KERN_CONT "\n"); 176 } 177 } 178 179 /* 180 * Sanitize the BIOS e820 map. 181 * 182 * Some e820 responses include overlapping entries. The following 183 * replaces the original e820 map with a new one, removing overlaps, 184 * and resolving conflicting memory types in favor of highest 185 * numbered type. 186 * 187 * The input parameter biosmap points to an array of 'struct 188 * e820entry' which on entry has elements in the range [0, *pnr_map) 189 * valid, and which has space for up to max_nr_map entries. 190 * On return, the resulting sanitized e820 map entries will be in 191 * overwritten in the same location, starting at biosmap. 192 * 193 * The integer pointed to by pnr_map must be valid on entry (the 194 * current number of valid entries located at biosmap). If the 195 * sanitizing succeeds the *pnr_map will be updated with the new 196 * number of valid entries (something no more than max_nr_map). 197 * 198 * The return value from sanitize_e820_map() is zero if it 199 * successfully 'sanitized' the map entries passed in, and is -1 200 * if it did nothing, which can happen if either of (1) it was 201 * only passed one map entry, or (2) any of the input map entries 202 * were invalid (start + size < start, meaning that the size was 203 * so big the described memory range wrapped around through zero.) 204 * 205 * Visually we're performing the following 206 * (1,2,3,4 = memory types)... 207 * 208 * Sample memory map (w/overlaps): 209 * ____22__________________ 210 * ______________________4_ 211 * ____1111________________ 212 * _44_____________________ 213 * 11111111________________ 214 * ____________________33__ 215 * ___________44___________ 216 * __________33333_________ 217 * ______________22________ 218 * ___________________2222_ 219 * _________111111111______ 220 * _____________________11_ 221 * _________________4______ 222 * 223 * Sanitized equivalent (no overlap): 224 * 1_______________________ 225 * _44_____________________ 226 * ___1____________________ 227 * ____22__________________ 228 * ______11________________ 229 * _________1______________ 230 * __________3_____________ 231 * ___________44___________ 232 * _____________33_________ 233 * _______________2________ 234 * ________________1_______ 235 * _________________4______ 236 * ___________________2____ 237 * ____________________33__ 238 * ______________________4_ 239 */ 240 struct change_member { 241 struct e820entry *pbios; /* pointer to original bios entry */ 242 unsigned long long addr; /* address for this change point */ 243 }; 244 245 static int __init cpcompare(const void *a, const void *b) 246 { 247 struct change_member * const *app = a, * const *bpp = b; 248 const struct change_member *ap = *app, *bp = *bpp; 249 250 /* 251 * Inputs are pointers to two elements of change_point[]. If their 252 * addresses are unequal, their difference dominates. If the addresses 253 * are equal, then consider one that represents the end of its region 254 * to be greater than one that does not. 255 */ 256 if (ap->addr != bp->addr) 257 return ap->addr > bp->addr ? 1 : -1; 258 259 return (ap->addr != ap->pbios->addr) - (bp->addr != bp->pbios->addr); 260 } 261 262 int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, 263 u32 *pnr_map) 264 { 265 static struct change_member change_point_list[2*E820_X_MAX] __initdata; 266 static struct change_member *change_point[2*E820_X_MAX] __initdata; 267 static struct e820entry *overlap_list[E820_X_MAX] __initdata; 268 static struct e820entry new_bios[E820_X_MAX] __initdata; 269 unsigned long current_type, last_type; 270 unsigned long long last_addr; 271 int chgidx; 272 int overlap_entries; 273 int new_bios_entry; 274 int old_nr, new_nr, chg_nr; 275 int i; 276 277 /* if there's only one memory region, don't bother */ 278 if (*pnr_map < 2) 279 return -1; 280 281 old_nr = *pnr_map; 282 BUG_ON(old_nr > max_nr_map); 283 284 /* bail out if we find any unreasonable addresses in bios map */ 285 for (i = 0; i < old_nr; i++) 286 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) 287 return -1; 288 289 /* create pointers for initial change-point information (for sorting) */ 290 for (i = 0; i < 2 * old_nr; i++) 291 change_point[i] = &change_point_list[i]; 292 293 /* record all known change-points (starting and ending addresses), 294 omitting those that are for empty memory regions */ 295 chgidx = 0; 296 for (i = 0; i < old_nr; i++) { 297 if (biosmap[i].size != 0) { 298 change_point[chgidx]->addr = biosmap[i].addr; 299 change_point[chgidx++]->pbios = &biosmap[i]; 300 change_point[chgidx]->addr = biosmap[i].addr + 301 biosmap[i].size; 302 change_point[chgidx++]->pbios = &biosmap[i]; 303 } 304 } 305 chg_nr = chgidx; 306 307 /* sort change-point list by memory addresses (low -> high) */ 308 sort(change_point, chg_nr, sizeof *change_point, cpcompare, NULL); 309 310 /* create a new bios memory map, removing overlaps */ 311 overlap_entries = 0; /* number of entries in the overlap table */ 312 new_bios_entry = 0; /* index for creating new bios map entries */ 313 last_type = 0; /* start with undefined memory type */ 314 last_addr = 0; /* start with 0 as last starting address */ 315 316 /* loop through change-points, determining affect on the new bios map */ 317 for (chgidx = 0; chgidx < chg_nr; chgidx++) { 318 /* keep track of all overlapping bios entries */ 319 if (change_point[chgidx]->addr == 320 change_point[chgidx]->pbios->addr) { 321 /* 322 * add map entry to overlap list (> 1 entry 323 * implies an overlap) 324 */ 325 overlap_list[overlap_entries++] = 326 change_point[chgidx]->pbios; 327 } else { 328 /* 329 * remove entry from list (order independent, 330 * so swap with last) 331 */ 332 for (i = 0; i < overlap_entries; i++) { 333 if (overlap_list[i] == 334 change_point[chgidx]->pbios) 335 overlap_list[i] = 336 overlap_list[overlap_entries-1]; 337 } 338 overlap_entries--; 339 } 340 /* 341 * if there are overlapping entries, decide which 342 * "type" to use (larger value takes precedence -- 343 * 1=usable, 2,3,4,4+=unusable) 344 */ 345 current_type = 0; 346 for (i = 0; i < overlap_entries; i++) 347 if (overlap_list[i]->type > current_type) 348 current_type = overlap_list[i]->type; 349 /* 350 * continue building up new bios map based on this 351 * information 352 */ 353 if (current_type != last_type || current_type == E820_PRAM) { 354 if (last_type != 0) { 355 new_bios[new_bios_entry].size = 356 change_point[chgidx]->addr - last_addr; 357 /* 358 * move forward only if the new size 359 * was non-zero 360 */ 361 if (new_bios[new_bios_entry].size != 0) 362 /* 363 * no more space left for new 364 * bios entries ? 365 */ 366 if (++new_bios_entry >= max_nr_map) 367 break; 368 } 369 if (current_type != 0) { 370 new_bios[new_bios_entry].addr = 371 change_point[chgidx]->addr; 372 new_bios[new_bios_entry].type = current_type; 373 last_addr = change_point[chgidx]->addr; 374 } 375 last_type = current_type; 376 } 377 } 378 /* retain count for new bios entries */ 379 new_nr = new_bios_entry; 380 381 /* copy new bios mapping into original location */ 382 memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); 383 *pnr_map = new_nr; 384 385 return 0; 386 } 387 388 static int __init __append_e820_map(struct e820entry *biosmap, int nr_map) 389 { 390 while (nr_map) { 391 u64 start = biosmap->addr; 392 u64 size = biosmap->size; 393 u64 end = start + size - 1; 394 u32 type = biosmap->type; 395 396 /* Overflow in 64 bits? Ignore the memory map. */ 397 if (start > end && likely(size)) 398 return -1; 399 400 e820_add_region(start, size, type); 401 402 biosmap++; 403 nr_map--; 404 } 405 return 0; 406 } 407 408 /* 409 * Copy the BIOS e820 map into a safe place. 410 * 411 * Sanity-check it while we're at it.. 412 * 413 * If we're lucky and live on a modern system, the setup code 414 * will have given us a memory map that we can use to properly 415 * set up memory. If we aren't, we'll fake a memory map. 416 */ 417 static int __init append_e820_map(struct e820entry *biosmap, int nr_map) 418 { 419 /* Only one memory region (or negative)? Ignore it */ 420 if (nr_map < 2) 421 return -1; 422 423 return __append_e820_map(biosmap, nr_map); 424 } 425 426 static u64 __init __e820_update_range(struct e820map *e820x, u64 start, 427 u64 size, unsigned old_type, 428 unsigned new_type) 429 { 430 u64 end; 431 unsigned int i; 432 u64 real_updated_size = 0; 433 434 BUG_ON(old_type == new_type); 435 436 if (size > (ULLONG_MAX - start)) 437 size = ULLONG_MAX - start; 438 439 end = start + size; 440 printk(KERN_DEBUG "e820: update [mem %#010Lx-%#010Lx] ", 441 (unsigned long long) start, (unsigned long long) (end - 1)); 442 e820_print_type(old_type); 443 printk(KERN_CONT " ==> "); 444 e820_print_type(new_type); 445 printk(KERN_CONT "\n"); 446 447 for (i = 0; i < e820x->nr_map; i++) { 448 struct e820entry *ei = &e820x->map[i]; 449 u64 final_start, final_end; 450 u64 ei_end; 451 452 if (ei->type != old_type) 453 continue; 454 455 ei_end = ei->addr + ei->size; 456 /* totally covered by new range? */ 457 if (ei->addr >= start && ei_end <= end) { 458 ei->type = new_type; 459 real_updated_size += ei->size; 460 continue; 461 } 462 463 /* new range is totally covered? */ 464 if (ei->addr < start && ei_end > end) { 465 __e820_add_region(e820x, start, size, new_type); 466 __e820_add_region(e820x, end, ei_end - end, ei->type); 467 ei->size = start - ei->addr; 468 real_updated_size += size; 469 continue; 470 } 471 472 /* partially covered */ 473 final_start = max(start, ei->addr); 474 final_end = min(end, ei_end); 475 if (final_start >= final_end) 476 continue; 477 478 __e820_add_region(e820x, final_start, final_end - final_start, 479 new_type); 480 481 real_updated_size += final_end - final_start; 482 483 /* 484 * left range could be head or tail, so need to update 485 * size at first. 486 */ 487 ei->size -= final_end - final_start; 488 if (ei->addr < final_start) 489 continue; 490 ei->addr = final_end; 491 } 492 return real_updated_size; 493 } 494 495 u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, 496 unsigned new_type) 497 { 498 return __e820_update_range(e820, start, size, old_type, new_type); 499 } 500 501 static u64 __init e820_update_range_saved(u64 start, u64 size, 502 unsigned old_type, unsigned new_type) 503 { 504 return __e820_update_range(e820_saved, start, size, old_type, 505 new_type); 506 } 507 508 /* make e820 not cover the range */ 509 u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, 510 int checktype) 511 { 512 int i; 513 u64 end; 514 u64 real_removed_size = 0; 515 516 if (size > (ULLONG_MAX - start)) 517 size = ULLONG_MAX - start; 518 519 end = start + size; 520 printk(KERN_DEBUG "e820: remove [mem %#010Lx-%#010Lx] ", 521 (unsigned long long) start, (unsigned long long) (end - 1)); 522 if (checktype) 523 e820_print_type(old_type); 524 printk(KERN_CONT "\n"); 525 526 for (i = 0; i < e820->nr_map; i++) { 527 struct e820entry *ei = &e820->map[i]; 528 u64 final_start, final_end; 529 u64 ei_end; 530 531 if (checktype && ei->type != old_type) 532 continue; 533 534 ei_end = ei->addr + ei->size; 535 /* totally covered? */ 536 if (ei->addr >= start && ei_end <= end) { 537 real_removed_size += ei->size; 538 memset(ei, 0, sizeof(struct e820entry)); 539 continue; 540 } 541 542 /* new range is totally covered? */ 543 if (ei->addr < start && ei_end > end) { 544 e820_add_region(end, ei_end - end, ei->type); 545 ei->size = start - ei->addr; 546 real_removed_size += size; 547 continue; 548 } 549 550 /* partially covered */ 551 final_start = max(start, ei->addr); 552 final_end = min(end, ei_end); 553 if (final_start >= final_end) 554 continue; 555 real_removed_size += final_end - final_start; 556 557 /* 558 * left range could be head or tail, so need to update 559 * size at first. 560 */ 561 ei->size -= final_end - final_start; 562 if (ei->addr < final_start) 563 continue; 564 ei->addr = final_end; 565 } 566 return real_removed_size; 567 } 568 569 void __init update_e820(void) 570 { 571 if (sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map)) 572 return; 573 printk(KERN_INFO "e820: modified physical RAM map:\n"); 574 e820_print_map("modified"); 575 } 576 static void __init update_e820_saved(void) 577 { 578 sanitize_e820_map(e820_saved->map, ARRAY_SIZE(e820_saved->map), 579 &e820_saved->nr_map); 580 } 581 #define MAX_GAP_END 0x100000000ull 582 /* 583 * Search for a gap in the e820 memory space from 0 to MAX_GAP_END. 584 */ 585 static int __init e820_search_gap(unsigned long *gapstart, 586 unsigned long *gapsize) 587 { 588 unsigned long long last = MAX_GAP_END; 589 int i = e820->nr_map; 590 int found = 0; 591 592 while (--i >= 0) { 593 unsigned long long start = e820->map[i].addr; 594 unsigned long long end = start + e820->map[i].size; 595 596 /* 597 * Since "last" is at most 4GB, we know we'll 598 * fit in 32 bits if this condition is true 599 */ 600 if (last > end) { 601 unsigned long gap = last - end; 602 603 if (gap >= *gapsize) { 604 *gapsize = gap; 605 *gapstart = end; 606 found = 1; 607 } 608 } 609 if (start < last) 610 last = start; 611 } 612 return found; 613 } 614 615 /* 616 * Search for the biggest gap in the low 32 bits of the e820 617 * memory space. We pass this space to PCI to assign MMIO resources 618 * for hotplug or unconfigured devices in. 619 * Hopefully the BIOS let enough space left. 620 */ 621 __init void e820_setup_gap(void) 622 { 623 unsigned long gapstart, gapsize; 624 int found; 625 626 gapsize = 0x400000; 627 found = e820_search_gap(&gapstart, &gapsize); 628 629 if (!found) { 630 #ifdef CONFIG_X86_64 631 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; 632 printk(KERN_ERR 633 "e820: cannot find a gap in the 32bit address range\n" 634 "e820: PCI devices with unassigned 32bit BARs may break!\n"); 635 #else 636 gapstart = 0x10000000; 637 #endif 638 } 639 640 /* 641 * e820_reserve_resources_late protect stolen RAM already 642 */ 643 pci_mem_start = gapstart; 644 645 printk(KERN_INFO 646 "e820: [mem %#010lx-%#010lx] available for PCI devices\n", 647 gapstart, gapstart + gapsize - 1); 648 } 649 650 /* 651 * Called late during init, in free_initmem(). 652 * 653 * Initial e820 and e820_saved are largish __initdata arrays. 654 * Copy them to (usually much smaller) dynamically allocated area. 655 * This is done after all tweaks we ever do to them: 656 * all functions which modify them are __init functions, 657 * they won't exist after this point. 658 */ 659 __init void e820_reallocate_tables(void) 660 { 661 struct e820map *n; 662 int size; 663 664 size = offsetof(struct e820map, map) + sizeof(struct e820entry) * e820->nr_map; 665 n = kmalloc(size, GFP_KERNEL); 666 BUG_ON(!n); 667 memcpy(n, e820, size); 668 e820 = n; 669 670 size = offsetof(struct e820map, map) + sizeof(struct e820entry) * e820_saved->nr_map; 671 n = kmalloc(size, GFP_KERNEL); 672 BUG_ON(!n); 673 memcpy(n, e820_saved, size); 674 e820_saved = n; 675 } 676 677 /** 678 * Because of the size limitation of struct boot_params, only first 679 * 128 E820 memory entries are passed to kernel via 680 * boot_params.e820_map, others are passed via SETUP_E820_EXT node of 681 * linked list of struct setup_data, which is parsed here. 682 */ 683 void __init parse_e820_ext(u64 phys_addr, u32 data_len) 684 { 685 int entries; 686 struct e820entry *extmap; 687 struct setup_data *sdata; 688 689 sdata = early_memremap(phys_addr, data_len); 690 entries = sdata->len / sizeof(struct e820entry); 691 extmap = (struct e820entry *)(sdata->data); 692 __append_e820_map(extmap, entries); 693 sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map); 694 early_memunmap(sdata, data_len); 695 printk(KERN_INFO "e820: extended physical RAM map:\n"); 696 e820_print_map("extended"); 697 } 698 699 #if defined(CONFIG_X86_64) || \ 700 (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) 701 /** 702 * Find the ranges of physical addresses that do not correspond to 703 * e820 RAM areas and mark the corresponding pages as nosave for 704 * hibernation (32 bit) or software suspend and suspend to RAM (64 bit). 705 * 706 * This function requires the e820 map to be sorted and without any 707 * overlapping entries. 708 */ 709 void __init e820_mark_nosave_regions(unsigned long limit_pfn) 710 { 711 int i; 712 unsigned long pfn = 0; 713 714 for (i = 0; i < e820->nr_map; i++) { 715 struct e820entry *ei = &e820->map[i]; 716 717 if (pfn < PFN_UP(ei->addr)) 718 register_nosave_region(pfn, PFN_UP(ei->addr)); 719 720 pfn = PFN_DOWN(ei->addr + ei->size); 721 722 if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) 723 register_nosave_region(PFN_UP(ei->addr), pfn); 724 725 if (pfn >= limit_pfn) 726 break; 727 } 728 } 729 #endif 730 731 #ifdef CONFIG_ACPI 732 /** 733 * Mark ACPI NVS memory region, so that we can save/restore it during 734 * hibernation and the subsequent resume. 735 */ 736 static int __init e820_mark_nvs_memory(void) 737 { 738 int i; 739 740 for (i = 0; i < e820->nr_map; i++) { 741 struct e820entry *ei = &e820->map[i]; 742 743 if (ei->type == E820_NVS) 744 acpi_nvs_register(ei->addr, ei->size); 745 } 746 747 return 0; 748 } 749 core_initcall(e820_mark_nvs_memory); 750 #endif 751 752 /* 753 * pre allocated 4k and reserved it in memblock and e820_saved 754 */ 755 u64 __init early_reserve_e820(u64 size, u64 align) 756 { 757 u64 addr; 758 759 addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); 760 if (addr) { 761 e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED); 762 printk(KERN_INFO "e820: update e820_saved for early_reserve_e820\n"); 763 update_e820_saved(); 764 } 765 766 return addr; 767 } 768 769 #ifdef CONFIG_X86_32 770 # ifdef CONFIG_X86_PAE 771 # define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) 772 # else 773 # define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) 774 # endif 775 #else /* CONFIG_X86_32 */ 776 # define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT 777 #endif 778 779 /* 780 * Find the highest page frame number we have available 781 */ 782 static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) 783 { 784 int i; 785 unsigned long last_pfn = 0; 786 unsigned long max_arch_pfn = MAX_ARCH_PFN; 787 788 for (i = 0; i < e820->nr_map; i++) { 789 struct e820entry *ei = &e820->map[i]; 790 unsigned long start_pfn; 791 unsigned long end_pfn; 792 793 if (ei->type != type) 794 continue; 795 796 start_pfn = ei->addr >> PAGE_SHIFT; 797 end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT; 798 799 if (start_pfn >= limit_pfn) 800 continue; 801 if (end_pfn > limit_pfn) { 802 last_pfn = limit_pfn; 803 break; 804 } 805 if (end_pfn > last_pfn) 806 last_pfn = end_pfn; 807 } 808 809 if (last_pfn > max_arch_pfn) 810 last_pfn = max_arch_pfn; 811 812 printk(KERN_INFO "e820: last_pfn = %#lx max_arch_pfn = %#lx\n", 813 last_pfn, max_arch_pfn); 814 return last_pfn; 815 } 816 unsigned long __init e820_end_of_ram_pfn(void) 817 { 818 return e820_end_pfn(MAX_ARCH_PFN, E820_RAM); 819 } 820 821 unsigned long __init e820_end_of_low_ram_pfn(void) 822 { 823 return e820_end_pfn(1UL << (32 - PAGE_SHIFT), E820_RAM); 824 } 825 826 static void __init early_panic(char *msg) 827 { 828 early_printk(msg); 829 panic(msg); 830 } 831 832 static int userdef __initdata; 833 834 /* "mem=nopentium" disables the 4MB page tables. */ 835 static int __init parse_memopt(char *p) 836 { 837 u64 mem_size; 838 839 if (!p) 840 return -EINVAL; 841 842 if (!strcmp(p, "nopentium")) { 843 #ifdef CONFIG_X86_32 844 setup_clear_cpu_cap(X86_FEATURE_PSE); 845 return 0; 846 #else 847 printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n"); 848 return -EINVAL; 849 #endif 850 } 851 852 userdef = 1; 853 mem_size = memparse(p, &p); 854 /* don't remove all of memory when handling "mem={invalid}" param */ 855 if (mem_size == 0) 856 return -EINVAL; 857 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); 858 859 return 0; 860 } 861 early_param("mem", parse_memopt); 862 863 static int __init parse_memmap_one(char *p) 864 { 865 char *oldp; 866 u64 start_at, mem_size; 867 868 if (!p) 869 return -EINVAL; 870 871 if (!strncmp(p, "exactmap", 8)) { 872 #ifdef CONFIG_CRASH_DUMP 873 /* 874 * If we are doing a crash dump, we still need to know 875 * the real mem size before original memory map is 876 * reset. 877 */ 878 saved_max_pfn = e820_end_of_ram_pfn(); 879 #endif 880 e820->nr_map = 0; 881 userdef = 1; 882 return 0; 883 } 884 885 oldp = p; 886 mem_size = memparse(p, &p); 887 if (p == oldp) 888 return -EINVAL; 889 890 userdef = 1; 891 if (*p == '@') { 892 start_at = memparse(p+1, &p); 893 e820_add_region(start_at, mem_size, E820_RAM); 894 } else if (*p == '#') { 895 start_at = memparse(p+1, &p); 896 e820_add_region(start_at, mem_size, E820_ACPI); 897 } else if (*p == '$') { 898 start_at = memparse(p+1, &p); 899 e820_add_region(start_at, mem_size, E820_RESERVED); 900 } else if (*p == '!') { 901 start_at = memparse(p+1, &p); 902 e820_add_region(start_at, mem_size, E820_PRAM); 903 } else 904 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); 905 906 return *p == '\0' ? 0 : -EINVAL; 907 } 908 static int __init parse_memmap_opt(char *str) 909 { 910 while (str) { 911 char *k = strchr(str, ','); 912 913 if (k) 914 *k++ = 0; 915 916 parse_memmap_one(str); 917 str = k; 918 } 919 920 return 0; 921 } 922 early_param("memmap", parse_memmap_opt); 923 924 void __init finish_e820_parsing(void) 925 { 926 if (userdef) { 927 if (sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), 928 &e820->nr_map) < 0) 929 early_panic("Invalid user supplied memory map"); 930 931 printk(KERN_INFO "e820: user-defined physical RAM map:\n"); 932 e820_print_map("user"); 933 } 934 } 935 936 static const char *__init e820_type_to_string(int e820_type) 937 { 938 switch (e820_type) { 939 case E820_RESERVED_KERN: 940 case E820_RAM: return "System RAM"; 941 case E820_ACPI: return "ACPI Tables"; 942 case E820_NVS: return "ACPI Non-volatile Storage"; 943 case E820_UNUSABLE: return "Unusable memory"; 944 case E820_PRAM: return "Persistent Memory (legacy)"; 945 case E820_PMEM: return "Persistent Memory"; 946 default: return "reserved"; 947 } 948 } 949 950 static unsigned long __init e820_type_to_iomem_type(int e820_type) 951 { 952 switch (e820_type) { 953 case E820_RESERVED_KERN: 954 case E820_RAM: 955 return IORESOURCE_SYSTEM_RAM; 956 case E820_ACPI: 957 case E820_NVS: 958 case E820_UNUSABLE: 959 case E820_PRAM: 960 case E820_PMEM: 961 default: 962 return IORESOURCE_MEM; 963 } 964 } 965 966 static unsigned long __init e820_type_to_iores_desc(int e820_type) 967 { 968 switch (e820_type) { 969 case E820_ACPI: 970 return IORES_DESC_ACPI_TABLES; 971 case E820_NVS: 972 return IORES_DESC_ACPI_NV_STORAGE; 973 case E820_PMEM: 974 return IORES_DESC_PERSISTENT_MEMORY; 975 case E820_PRAM: 976 return IORES_DESC_PERSISTENT_MEMORY_LEGACY; 977 case E820_RESERVED_KERN: 978 case E820_RAM: 979 case E820_UNUSABLE: 980 default: 981 return IORES_DESC_NONE; 982 } 983 } 984 985 static bool __init do_mark_busy(u32 type, struct resource *res) 986 { 987 /* this is the legacy bios/dos rom-shadow + mmio region */ 988 if (res->start < (1ULL<<20)) 989 return true; 990 991 /* 992 * Treat persistent memory like device memory, i.e. reserve it 993 * for exclusive use of a driver 994 */ 995 switch (type) { 996 case E820_RESERVED: 997 case E820_PRAM: 998 case E820_PMEM: 999 return false; 1000 default: 1001 return true; 1002 } 1003 } 1004 1005 /* 1006 * Mark e820 reserved areas as busy for the resource manager. 1007 */ 1008 static struct resource __initdata *e820_res; 1009 void __init e820_reserve_resources(void) 1010 { 1011 int i; 1012 struct resource *res; 1013 u64 end; 1014 1015 res = alloc_bootmem(sizeof(struct resource) * e820->nr_map); 1016 e820_res = res; 1017 for (i = 0; i < e820->nr_map; i++) { 1018 end = e820->map[i].addr + e820->map[i].size - 1; 1019 if (end != (resource_size_t)end) { 1020 res++; 1021 continue; 1022 } 1023 res->name = e820_type_to_string(e820->map[i].type); 1024 res->start = e820->map[i].addr; 1025 res->end = end; 1026 1027 res->flags = e820_type_to_iomem_type(e820->map[i].type); 1028 res->desc = e820_type_to_iores_desc(e820->map[i].type); 1029 1030 /* 1031 * don't register the region that could be conflicted with 1032 * pci device BAR resource and insert them later in 1033 * pcibios_resource_survey() 1034 */ 1035 if (do_mark_busy(e820->map[i].type, res)) { 1036 res->flags |= IORESOURCE_BUSY; 1037 insert_resource(&iomem_resource, res); 1038 } 1039 res++; 1040 } 1041 1042 for (i = 0; i < e820_saved->nr_map; i++) { 1043 struct e820entry *entry = &e820_saved->map[i]; 1044 firmware_map_add_early(entry->addr, 1045 entry->addr + entry->size, 1046 e820_type_to_string(entry->type)); 1047 } 1048 } 1049 1050 /* How much should we pad RAM ending depending on where it is? */ 1051 static unsigned long __init ram_alignment(resource_size_t pos) 1052 { 1053 unsigned long mb = pos >> 20; 1054 1055 /* To 64kB in the first megabyte */ 1056 if (!mb) 1057 return 64*1024; 1058 1059 /* To 1MB in the first 16MB */ 1060 if (mb < 16) 1061 return 1024*1024; 1062 1063 /* To 64MB for anything above that */ 1064 return 64*1024*1024; 1065 } 1066 1067 #define MAX_RESOURCE_SIZE ((resource_size_t)-1) 1068 1069 void __init e820_reserve_resources_late(void) 1070 { 1071 int i; 1072 struct resource *res; 1073 1074 res = e820_res; 1075 for (i = 0; i < e820->nr_map; i++) { 1076 if (!res->parent && res->end) 1077 insert_resource_expand_to_fit(&iomem_resource, res); 1078 res++; 1079 } 1080 1081 /* 1082 * Try to bump up RAM regions to reasonable boundaries to 1083 * avoid stolen RAM: 1084 */ 1085 for (i = 0; i < e820->nr_map; i++) { 1086 struct e820entry *entry = &e820->map[i]; 1087 u64 start, end; 1088 1089 if (entry->type != E820_RAM) 1090 continue; 1091 start = entry->addr + entry->size; 1092 end = round_up(start, ram_alignment(start)) - 1; 1093 if (end > MAX_RESOURCE_SIZE) 1094 end = MAX_RESOURCE_SIZE; 1095 if (start >= end) 1096 continue; 1097 printk(KERN_DEBUG 1098 "e820: reserve RAM buffer [mem %#010llx-%#010llx]\n", 1099 start, end); 1100 reserve_region_with_split(&iomem_resource, start, end, 1101 "RAM buffer"); 1102 } 1103 } 1104 1105 char *__init default_machine_specific_memory_setup(void) 1106 { 1107 char *who = "BIOS-e820"; 1108 u32 new_nr; 1109 /* 1110 * Try to copy the BIOS-supplied E820-map. 1111 * 1112 * Otherwise fake a memory map; one section from 0k->640k, 1113 * the next section from 1mb->appropriate_mem_k 1114 */ 1115 new_nr = boot_params.e820_entries; 1116 sanitize_e820_map(boot_params.e820_map, 1117 ARRAY_SIZE(boot_params.e820_map), 1118 &new_nr); 1119 boot_params.e820_entries = new_nr; 1120 if (append_e820_map(boot_params.e820_map, boot_params.e820_entries) 1121 < 0) { 1122 u64 mem_size; 1123 1124 /* compare results from other methods and take the greater */ 1125 if (boot_params.alt_mem_k 1126 < boot_params.screen_info.ext_mem_k) { 1127 mem_size = boot_params.screen_info.ext_mem_k; 1128 who = "BIOS-88"; 1129 } else { 1130 mem_size = boot_params.alt_mem_k; 1131 who = "BIOS-e801"; 1132 } 1133 1134 e820->nr_map = 0; 1135 e820_add_region(0, LOWMEMSIZE(), E820_RAM); 1136 e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM); 1137 } 1138 1139 /* In case someone cares... */ 1140 return who; 1141 } 1142 1143 void __init setup_memory_map(void) 1144 { 1145 char *who; 1146 1147 who = x86_init.resources.memory_setup(); 1148 memcpy(e820_saved, e820, sizeof(struct e820map)); 1149 printk(KERN_INFO "e820: BIOS-provided physical RAM map:\n"); 1150 e820_print_map(who); 1151 } 1152 1153 void __init memblock_x86_fill(void) 1154 { 1155 int i; 1156 u64 end; 1157 1158 /* 1159 * EFI may have more than 128 entries 1160 * We are safe to enable resizing, beause memblock_x86_fill() 1161 * is rather later for x86 1162 */ 1163 memblock_allow_resize(); 1164 1165 for (i = 0; i < e820->nr_map; i++) { 1166 struct e820entry *ei = &e820->map[i]; 1167 1168 end = ei->addr + ei->size; 1169 if (end != (resource_size_t)end) 1170 continue; 1171 1172 if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) 1173 continue; 1174 1175 memblock_add(ei->addr, ei->size); 1176 } 1177 1178 /* throw away partial pages */ 1179 memblock_trim_memory(PAGE_SIZE); 1180 1181 memblock_dump_all(); 1182 } 1183 1184 void __init memblock_find_dma_reserve(void) 1185 { 1186 #ifdef CONFIG_X86_64 1187 u64 nr_pages = 0, nr_free_pages = 0; 1188 unsigned long start_pfn, end_pfn; 1189 phys_addr_t start, end; 1190 int i; 1191 u64 u; 1192 1193 /* 1194 * need to find out used area below MAX_DMA_PFN 1195 * need to use memblock to get free size in [0, MAX_DMA_PFN] 1196 * at first, and assume boot_mem will not take below MAX_DMA_PFN 1197 */ 1198 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { 1199 start_pfn = min(start_pfn, MAX_DMA_PFN); 1200 end_pfn = min(end_pfn, MAX_DMA_PFN); 1201 nr_pages += end_pfn - start_pfn; 1202 } 1203 1204 for_each_free_mem_range(u, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, 1205 NULL) { 1206 start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); 1207 end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); 1208 if (start_pfn < end_pfn) 1209 nr_free_pages += end_pfn - start_pfn; 1210 } 1211 1212 set_dma_reserve(nr_pages - nr_free_pages); 1213 #endif 1214 } 1215