1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/sched/task.h> 3 #include <linux/pgtable.h> 4 #include <asm/pgalloc.h> 5 #include <asm/facility.h> 6 #include <asm/sections.h> 7 #include <asm/mem_detect.h> 8 #include <asm/maccess.h> 9 #include <asm/abs_lowcore.h> 10 #include "decompressor.h" 11 #include "boot.h" 12 13 #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) 14 #define swapper_pg_dir vmlinux.swapper_pg_dir_off 15 #define invalid_pg_dir vmlinux.invalid_pg_dir_off 16 17 /* 18 * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. 19 */ 20 static inline pte_t *__virt_to_kpte(unsigned long va) 21 { 22 return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); 23 } 24 25 unsigned long __bootdata_preserved(s390_invalid_asce); 26 unsigned long __bootdata(pgalloc_pos); 27 unsigned long __bootdata(pgalloc_end); 28 unsigned long __bootdata(pgalloc_low); 29 30 enum populate_mode { 31 POPULATE_NONE, 32 POPULATE_ONE2ONE, 33 POPULATE_ABS_LOWCORE, 34 }; 35 36 static void boot_check_oom(void) 37 { 38 if (pgalloc_pos < pgalloc_low) 39 error("out of memory on boot\n"); 40 } 41 42 static void pgtable_populate_init(void) 43 { 44 unsigned long initrd_end; 45 unsigned long kernel_end; 46 47 kernel_end = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size; 48 pgalloc_low = round_up(kernel_end, PAGE_SIZE); 49 if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) { 50 initrd_end = round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE); 51 pgalloc_low = max(pgalloc_low, initrd_end); 52 } 53 54 pgalloc_end = round_down(get_mem_detect_end(), PAGE_SIZE); 55 pgalloc_pos = pgalloc_end; 56 57 boot_check_oom(); 58 } 59 60 static void *boot_alloc_pages(unsigned int order) 61 { 62 unsigned long size = PAGE_SIZE << order; 63 64 pgalloc_pos -= size; 65 pgalloc_pos = round_down(pgalloc_pos, size); 66 67 boot_check_oom(); 68 69 return (void *)pgalloc_pos; 70 } 71 72 static void *boot_crst_alloc(unsigned long val) 73 { 74 unsigned long *table; 75 76 table = boot_alloc_pages(CRST_ALLOC_ORDER); 77 if (table) 78 crst_table_init(table, val); 79 return table; 80 } 81 82 static pte_t *boot_pte_alloc(void) 83 { 84 static void *pte_leftover; 85 pte_t *pte; 86 87 BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE); 88 89 if (!pte_leftover) { 90 pte_leftover = boot_alloc_pages(0); 91 pte = pte_leftover + _PAGE_TABLE_SIZE; 92 } else { 93 pte = pte_leftover; 94 pte_leftover = NULL; 95 } 96 memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); 97 return pte; 98 } 99 100 static unsigned long _pa(unsigned long addr, enum populate_mode mode) 101 { 102 switch (mode) { 103 case POPULATE_NONE: 104 return -1; 105 case POPULATE_ONE2ONE: 106 return addr; 107 case POPULATE_ABS_LOWCORE: 108 return __abs_lowcore_pa(addr); 109 default: 110 return -1; 111 } 112 } 113 114 static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end) 115 { 116 return machine.has_edat2 && 117 IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE; 118 } 119 120 static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end) 121 { 122 return machine.has_edat1 && 123 IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE; 124 } 125 126 static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, 127 enum populate_mode mode) 128 { 129 unsigned long next; 130 pte_t *pte, entry; 131 132 pte = pte_offset_kernel(pmd, addr); 133 for (; addr < end; addr += PAGE_SIZE, pte++) { 134 if (pte_none(*pte)) { 135 entry = __pte(_pa(addr, mode)); 136 entry = set_pte_bit(entry, PAGE_KERNEL_EXEC); 137 set_pte(pte, entry); 138 } 139 } 140 } 141 142 static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, 143 enum populate_mode mode) 144 { 145 unsigned long next; 146 pmd_t *pmd, entry; 147 pte_t *pte; 148 149 pmd = pmd_offset(pud, addr); 150 for (; addr < end; addr = next, pmd++) { 151 next = pmd_addr_end(addr, end); 152 if (pmd_none(*pmd)) { 153 if (can_large_pmd(pmd, addr, next)) { 154 entry = __pmd(_pa(addr, mode)); 155 entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC); 156 set_pmd(pmd, entry); 157 continue; 158 } 159 pte = boot_pte_alloc(); 160 pmd_populate(&init_mm, pmd, pte); 161 } else if (pmd_large(*pmd)) { 162 continue; 163 } 164 pgtable_pte_populate(pmd, addr, next, mode); 165 } 166 } 167 168 static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, 169 enum populate_mode mode) 170 { 171 unsigned long next; 172 pud_t *pud, entry; 173 pmd_t *pmd; 174 175 pud = pud_offset(p4d, addr); 176 for (; addr < end; addr = next, pud++) { 177 next = pud_addr_end(addr, end); 178 if (pud_none(*pud)) { 179 if (can_large_pud(pud, addr, next)) { 180 entry = __pud(_pa(addr, mode)); 181 entry = set_pud_bit(entry, REGION3_KERNEL_EXEC); 182 set_pud(pud, entry); 183 continue; 184 } 185 pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); 186 pud_populate(&init_mm, pud, pmd); 187 } else if (pud_large(*pud)) { 188 continue; 189 } 190 pgtable_pmd_populate(pud, addr, next, mode); 191 } 192 } 193 194 static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end, 195 enum populate_mode mode) 196 { 197 unsigned long next; 198 p4d_t *p4d; 199 pud_t *pud; 200 201 p4d = p4d_offset(pgd, addr); 202 for (; addr < end; addr = next, p4d++) { 203 next = p4d_addr_end(addr, end); 204 if (p4d_none(*p4d)) { 205 pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY); 206 p4d_populate(&init_mm, p4d, pud); 207 } 208 pgtable_pud_populate(p4d, addr, next, mode); 209 } 210 } 211 212 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode) 213 { 214 unsigned long next; 215 pgd_t *pgd; 216 p4d_t *p4d; 217 218 pgd = pgd_offset(&init_mm, addr); 219 for (; addr < end; addr = next, pgd++) { 220 next = pgd_addr_end(addr, end); 221 if (pgd_none(*pgd)) { 222 p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY); 223 pgd_populate(&init_mm, pgd, p4d); 224 } 225 pgtable_p4d_populate(pgd, addr, next, mode); 226 } 227 } 228 229 void setup_vmem(unsigned long asce_limit) 230 { 231 unsigned long start, end; 232 unsigned long asce_type; 233 unsigned long asce_bits; 234 int i; 235 236 if (asce_limit == _REGION1_SIZE) { 237 asce_type = _REGION2_ENTRY_EMPTY; 238 asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; 239 } else { 240 asce_type = _REGION3_ENTRY_EMPTY; 241 asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 242 } 243 s390_invalid_asce = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 244 245 crst_table_init((unsigned long *)swapper_pg_dir, asce_type); 246 crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); 247 248 /* 249 * To allow prefixing the lowcore must be mapped with 4KB pages. 250 * To prevent creation of a large page at address 0 first map 251 * the lowcore and create the identity mapping only afterwards. 252 */ 253 pgtable_populate_init(); 254 pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE); 255 for_each_mem_detect_usable_block(i, &start, &end) 256 pgtable_populate(start, end, POPULATE_ONE2ONE); 257 pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), 258 POPULATE_ABS_LOWCORE); 259 pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE, 260 POPULATE_NONE); 261 memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area); 262 263 S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits; 264 S390_lowcore.user_asce = s390_invalid_asce; 265 266 __ctl_load(S390_lowcore.kernel_asce, 1, 1); 267 __ctl_load(S390_lowcore.user_asce, 7, 7); 268 __ctl_load(S390_lowcore.kernel_asce, 13, 13); 269 270 init_mm.context.asce = S390_lowcore.kernel_asce; 271 } 272 273 unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total) 274 { 275 unsigned long pages = DIV_ROUND_UP(online_mem_total, PAGE_SIZE); 276 277 return DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2; 278 } 279