1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2011 4 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> 5 */ 6 #include <linux/cpufeature.h> 7 #include <linux/hugetlb.h> 8 #include <linux/proc_fs.h> 9 #include <linux/vmalloc.h> 10 #include <linux/mm.h> 11 #include <asm/cacheflush.h> 12 #include <asm/facility.h> 13 #include <asm/pgalloc.h> 14 #include <asm/kfence.h> 15 #include <asm/page.h> 16 #include <asm/asm.h> 17 #include <asm/set_memory.h> 18 19 void __storage_key_init_range(unsigned long start, unsigned long end) 20 { 21 unsigned long boundary, size; 22 23 while (start < end) { 24 if (cpu_has_edat1()) { 25 /* set storage keys for a 1MB frame */ 26 size = 1UL << 20; 27 boundary = (start + size) & ~(size - 1); 28 if (boundary <= end) { 29 do { 30 start = sske_frame(start, PAGE_DEFAULT_KEY); 31 } while (start < boundary); 32 continue; 33 } 34 } 35 page_set_storage_key(start, PAGE_DEFAULT_KEY, 1); 36 start += PAGE_SIZE; 37 } 38 } 39 40 #ifdef CONFIG_PROC_FS 41 atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); 42 43 void arch_report_meminfo(struct seq_file *m) 44 { 45 seq_printf(m, "DirectMap4k: %8lu kB\n", 46 atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2); 47 seq_printf(m, "DirectMap1M: %8lu kB\n", 48 atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10); 49 seq_printf(m, "DirectMap2G: %8lu kB\n", 50 atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21); 51 } 52 #endif /* CONFIG_PROC_FS */ 53 54 static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, 55 unsigned long dtt) 56 { 57 unsigned long *table, mask; 58 59 mask = 0; 60 if (cpu_has_edat2()) { 61 switch (dtt) { 62 case CRDTE_DTT_REGION3: 63 mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1); 64 break; 65 case CRDTE_DTT_SEGMENT: 66 mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); 67 break; 68 case CRDTE_DTT_PAGE: 69 mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1); 70 break; 71 } 72 table = (unsigned long *)((unsigned long)old & mask); 73 crdte(*old, new, table, dtt, addr, get_lowcore()->kernel_asce.val); 74 } else { 75 cspg(old, *old, new); 76 } 77 } 78 79 static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, 80 unsigned long flags) 81 { 82 pte_t *ptep, new; 83 84 if (flags == SET_MEMORY_4K) 85 return 0; 86 ptep = pte_offset_kernel(pmdp, addr); 87 do { 88 new = *ptep; 89 if (pte_none(new)) 90 return -EINVAL; 91 if (flags & SET_MEMORY_RO) 92 new = pte_wrprotect(new); 93 else if (flags & SET_MEMORY_RW) 94 new = pte_mkwrite_novma(pte_mkdirty(new)); 95 if (flags & SET_MEMORY_NX) 96 new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC)); 97 else if (flags & SET_MEMORY_X) 98 new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); 99 if (flags & SET_MEMORY_INV) { 100 new = set_pte_bit(new, __pgprot(_PAGE_INVALID)); 101 } else if (flags & SET_MEMORY_DEF) { 102 new = __pte(pte_val(new) & PAGE_MASK); 103 new = set_pte_bit(new, PAGE_KERNEL); 104 } 105 pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); 106 ptep++; 107 addr += PAGE_SIZE; 108 cond_resched(); 109 } while (addr < end); 110 return 0; 111 } 112 113 static int split_pmd_page(pmd_t *pmdp, unsigned long addr) 114 { 115 unsigned long pte_addr, prot; 116 pte_t *pt_dir, *ptep; 117 pmd_t new; 118 int i, ro, nx; 119 120 pt_dir = vmem_pte_alloc(); 121 if (!pt_dir) 122 return -ENOMEM; 123 pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT; 124 ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT); 125 nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC); 126 prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); 127 if (!nx) 128 prot &= ~_PAGE_NOEXEC; 129 ptep = pt_dir; 130 for (i = 0; i < PTRS_PER_PTE; i++) { 131 set_pte(ptep, __pte(pte_addr | prot)); 132 pte_addr += PAGE_SIZE; 133 ptep++; 134 } 135 new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY); 136 pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); 137 update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE); 138 update_page_count(PG_DIRECT_MAP_1M, -1); 139 return 0; 140 } 141 142 static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, 143 unsigned long flags) 144 { 145 pmd_t new = *pmdp; 146 147 if (flags & SET_MEMORY_RO) 148 new = pmd_wrprotect(new); 149 else if (flags & SET_MEMORY_RW) 150 new = pmd_mkwrite_novma(pmd_mkdirty(new)); 151 if (flags & SET_MEMORY_NX) 152 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); 153 else if (flags & SET_MEMORY_X) 154 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); 155 if (flags & SET_MEMORY_INV) { 156 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); 157 } else if (flags & SET_MEMORY_DEF) { 158 new = __pmd(pmd_val(new) & PMD_MASK); 159 new = set_pmd_bit(new, SEGMENT_KERNEL); 160 } 161 pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); 162 } 163 164 static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end, 165 unsigned long flags) 166 { 167 unsigned long next; 168 int need_split; 169 pmd_t *pmdp; 170 int rc = 0; 171 172 pmdp = pmd_offset(pudp, addr); 173 do { 174 if (pmd_none(*pmdp)) 175 return -EINVAL; 176 next = pmd_addr_end(addr, end); 177 if (pmd_leaf(*pmdp)) { 178 need_split = !!(flags & SET_MEMORY_4K); 179 need_split |= !!(addr & ~PMD_MASK); 180 need_split |= !!(addr + PMD_SIZE > next); 181 if (need_split) { 182 rc = split_pmd_page(pmdp, addr); 183 if (rc) 184 return rc; 185 continue; 186 } 187 modify_pmd_page(pmdp, addr, flags); 188 } else { 189 rc = walk_pte_level(pmdp, addr, next, flags); 190 if (rc) 191 return rc; 192 } 193 pmdp++; 194 addr = next; 195 cond_resched(); 196 } while (addr < end); 197 return rc; 198 } 199 200 int split_pud_page(pud_t *pudp, unsigned long addr) 201 { 202 unsigned long pmd_addr, prot; 203 pmd_t *pm_dir, *pmdp; 204 pud_t new; 205 int i, ro, nx; 206 207 pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); 208 if (!pm_dir) 209 return -ENOMEM; 210 pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT; 211 ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT); 212 nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC); 213 prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL); 214 if (!nx) 215 prot &= ~_SEGMENT_ENTRY_NOEXEC; 216 pmdp = pm_dir; 217 for (i = 0; i < PTRS_PER_PMD; i++) { 218 set_pmd(pmdp, __pmd(pmd_addr | prot)); 219 pmd_addr += PMD_SIZE; 220 pmdp++; 221 } 222 new = __pud(__pa(pm_dir) | _REGION3_ENTRY); 223 pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); 224 update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD); 225 update_page_count(PG_DIRECT_MAP_2G, -1); 226 return 0; 227 } 228 229 static void modify_pud_page(pud_t *pudp, unsigned long addr, 230 unsigned long flags) 231 { 232 pud_t new = *pudp; 233 234 if (flags & SET_MEMORY_RO) 235 new = pud_wrprotect(new); 236 else if (flags & SET_MEMORY_RW) 237 new = pud_mkwrite(pud_mkdirty(new)); 238 if (flags & SET_MEMORY_NX) 239 new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); 240 else if (flags & SET_MEMORY_X) 241 new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); 242 if (flags & SET_MEMORY_INV) { 243 new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID)); 244 } else if (flags & SET_MEMORY_DEF) { 245 new = __pud(pud_val(new) & PUD_MASK); 246 new = set_pud_bit(new, REGION3_KERNEL); 247 } 248 pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); 249 } 250 251 static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end, 252 unsigned long flags) 253 { 254 unsigned long next; 255 int need_split; 256 pud_t *pudp; 257 int rc = 0; 258 259 pudp = pud_offset(p4d, addr); 260 do { 261 if (pud_none(*pudp)) 262 return -EINVAL; 263 next = pud_addr_end(addr, end); 264 if (pud_leaf(*pudp)) { 265 need_split = !!(flags & SET_MEMORY_4K); 266 need_split |= !!(addr & ~PUD_MASK); 267 need_split |= !!(addr + PUD_SIZE > next); 268 if (need_split) { 269 rc = split_pud_page(pudp, addr); 270 if (rc) 271 break; 272 continue; 273 } 274 modify_pud_page(pudp, addr, flags); 275 } else { 276 rc = walk_pmd_level(pudp, addr, next, flags); 277 } 278 pudp++; 279 addr = next; 280 cond_resched(); 281 } while (addr < end && !rc); 282 return rc; 283 } 284 285 static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end, 286 unsigned long flags) 287 { 288 unsigned long next; 289 p4d_t *p4dp; 290 int rc = 0; 291 292 p4dp = p4d_offset(pgd, addr); 293 do { 294 if (p4d_none(*p4dp)) 295 return -EINVAL; 296 next = p4d_addr_end(addr, end); 297 rc = walk_pud_level(p4dp, addr, next, flags); 298 p4dp++; 299 addr = next; 300 cond_resched(); 301 } while (addr < end && !rc); 302 return rc; 303 } 304 305 DEFINE_MUTEX(cpa_mutex); 306 307 static int change_page_attr(unsigned long addr, unsigned long end, 308 unsigned long flags) 309 { 310 unsigned long next; 311 int rc = -EINVAL; 312 pgd_t *pgdp; 313 314 pgdp = pgd_offset_k(addr); 315 do { 316 if (pgd_none(*pgdp)) 317 break; 318 next = pgd_addr_end(addr, end); 319 rc = walk_p4d_level(pgdp, addr, next, flags); 320 if (rc) 321 break; 322 cond_resched(); 323 } while (pgdp++, addr = next, addr < end && !rc); 324 return rc; 325 } 326 327 static int change_page_attr_alias(unsigned long addr, unsigned long end, 328 unsigned long flags) 329 { 330 unsigned long alias, offset, va_start, va_end; 331 struct vm_struct *area; 332 int rc = 0; 333 334 /* 335 * Changes to read-only permissions on kernel VA mappings are also 336 * applied to the kernel direct mapping. Execute permissions are 337 * intentionally not transferred to keep all allocated pages within 338 * the direct mapping non-executable. 339 */ 340 flags &= SET_MEMORY_RO | SET_MEMORY_RW; 341 if (!flags) 342 return 0; 343 area = NULL; 344 while (addr < end) { 345 if (!area) 346 area = find_vm_area((void *)addr); 347 if (!area || !(area->flags & VM_ALLOC)) 348 return 0; 349 va_start = (unsigned long)area->addr; 350 va_end = va_start + area->nr_pages * PAGE_SIZE; 351 offset = (addr - va_start) >> PAGE_SHIFT; 352 alias = (unsigned long)page_address(area->pages[offset]); 353 rc = change_page_attr(alias, alias + PAGE_SIZE, flags); 354 if (rc) 355 break; 356 addr += PAGE_SIZE; 357 if (addr >= va_end) 358 area = NULL; 359 } 360 return rc; 361 } 362 363 int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags) 364 { 365 unsigned long end; 366 int rc; 367 368 if (!cpu_has_nx()) 369 flags &= ~(SET_MEMORY_NX | SET_MEMORY_X); 370 if (!flags) 371 return 0; 372 if (!numpages) 373 return 0; 374 addr &= PAGE_MASK; 375 end = addr + numpages * PAGE_SIZE; 376 mutex_lock(&cpa_mutex); 377 rc = change_page_attr(addr, end, flags); 378 if (rc) 379 goto out; 380 rc = change_page_attr_alias(addr, end, flags); 381 out: 382 mutex_unlock(&cpa_mutex); 383 return rc; 384 } 385 386 int set_direct_map_invalid_noflush(struct page *page) 387 { 388 return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV); 389 } 390 391 int set_direct_map_default_noflush(struct page *page) 392 { 393 return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF); 394 } 395 396 int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) 397 { 398 unsigned long flags; 399 400 if (valid) 401 flags = SET_MEMORY_DEF; 402 else 403 flags = SET_MEMORY_INV; 404 405 return __set_memory((unsigned long)page_to_virt(page), nr, flags); 406 } 407 408 bool kernel_page_present(struct page *page) 409 { 410 unsigned long addr; 411 unsigned int cc; 412 413 addr = (unsigned long)page_address(page); 414 asm volatile( 415 " lra %[addr],0(%[addr])\n" 416 CC_IPM(cc) 417 : CC_OUT(cc, cc), [addr] "+a" (addr) 418 : 419 : CC_CLOBBER); 420 return CC_TRANSFORM(cc) == 0; 421 } 422 423 #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) 424 425 static void ipte_range(pte_t *pte, unsigned long address, int nr) 426 { 427 int i; 428 429 if (test_facility(13)) { 430 __ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL); 431 return; 432 } 433 for (i = 0; i < nr; i++) { 434 __ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL); 435 address += PAGE_SIZE; 436 pte++; 437 } 438 } 439 440 void __kernel_map_pages(struct page *page, int numpages, int enable) 441 { 442 unsigned long address; 443 pte_t *ptep, pte; 444 int nr, i, j; 445 446 for (i = 0; i < numpages;) { 447 address = (unsigned long)page_to_virt(page + i); 448 ptep = virt_to_kpte(address); 449 nr = (unsigned long)ptep >> ilog2(sizeof(long)); 450 nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1)); 451 nr = min(numpages - i, nr); 452 if (enable) { 453 for (j = 0; j < nr; j++) { 454 pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID)); 455 set_pte(ptep, pte); 456 address += PAGE_SIZE; 457 ptep++; 458 } 459 } else { 460 ipte_range(ptep, address, nr); 461 } 462 i += nr; 463 } 464 } 465 466 #endif /* CONFIG_DEBUG_PAGEALLOC */ 467