1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2011 4 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> 5 */ 6 #include <linux/hugetlb.h> 7 #include <linux/proc_fs.h> 8 #include <linux/vmalloc.h> 9 #include <linux/mm.h> 10 #include <asm/cacheflush.h> 11 #include <asm/facility.h> 12 #include <asm/pgalloc.h> 13 #include <asm/kfence.h> 14 #include <asm/page.h> 15 #include <asm/asm.h> 16 #include <asm/set_memory.h> 17 18 static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) 19 { 20 asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0" 21 : [addr] "+a" (addr) : [skey] "d" (skey)); 22 return addr; 23 } 24 25 void __storage_key_init_range(unsigned long start, unsigned long end) 26 { 27 unsigned long boundary, size; 28 29 while (start < end) { 30 if (MACHINE_HAS_EDAT1) { 31 /* set storage keys for a 1MB frame */ 32 size = 1UL << 20; 33 boundary = (start + size) & ~(size - 1); 34 if (boundary <= end) { 35 do { 36 start = sske_frame(start, PAGE_DEFAULT_KEY); 37 } while (start < boundary); 38 continue; 39 } 40 } 41 page_set_storage_key(start, PAGE_DEFAULT_KEY, 1); 42 start += PAGE_SIZE; 43 } 44 } 45 46 #ifdef CONFIG_PROC_FS 47 atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); 48 49 void arch_report_meminfo(struct seq_file *m) 50 { 51 seq_printf(m, "DirectMap4k: %8lu kB\n", 52 atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2); 53 seq_printf(m, "DirectMap1M: %8lu kB\n", 54 atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10); 55 seq_printf(m, "DirectMap2G: %8lu kB\n", 56 atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21); 57 } 58 #endif /* CONFIG_PROC_FS */ 59 60 static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, 61 unsigned long dtt) 62 { 63 unsigned long *table, mask; 64 65 mask = 0; 66 if (MACHINE_HAS_EDAT2) { 67 switch (dtt) { 68 case CRDTE_DTT_REGION3: 69 mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1); 70 break; 71 case CRDTE_DTT_SEGMENT: 72 mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); 73 break; 74 case CRDTE_DTT_PAGE: 75 mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1); 76 break; 77 } 78 table = (unsigned long *)((unsigned long)old & mask); 79 crdte(*old, new, table, dtt, addr, get_lowcore()->kernel_asce.val); 80 } else if (MACHINE_HAS_IDTE) { 81 cspg(old, *old, new); 82 } else { 83 csp((unsigned int *)old + 1, *old, new); 84 } 85 } 86 87 static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, 88 unsigned long flags) 89 { 90 pte_t *ptep, new; 91 92 if (flags == SET_MEMORY_4K) 93 return 0; 94 ptep = pte_offset_kernel(pmdp, addr); 95 do { 96 new = *ptep; 97 if (pte_none(new)) 98 return -EINVAL; 99 if (flags & SET_MEMORY_RO) 100 new = pte_wrprotect(new); 101 else if (flags & SET_MEMORY_RW) 102 new = pte_mkwrite_novma(pte_mkdirty(new)); 103 if (flags & SET_MEMORY_NX) 104 new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC)); 105 else if (flags & SET_MEMORY_X) 106 new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); 107 if (flags & SET_MEMORY_INV) { 108 new = set_pte_bit(new, __pgprot(_PAGE_INVALID)); 109 } else if (flags & SET_MEMORY_DEF) { 110 new = __pte(pte_val(new) & PAGE_MASK); 111 new = set_pte_bit(new, PAGE_KERNEL); 112 if (!MACHINE_HAS_NX) 113 new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); 114 } 115 pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); 116 ptep++; 117 addr += PAGE_SIZE; 118 cond_resched(); 119 } while (addr < end); 120 return 0; 121 } 122 123 static int split_pmd_page(pmd_t *pmdp, unsigned long addr) 124 { 125 unsigned long pte_addr, prot; 126 pte_t *pt_dir, *ptep; 127 pmd_t new; 128 int i, ro, nx; 129 130 pt_dir = vmem_pte_alloc(); 131 if (!pt_dir) 132 return -ENOMEM; 133 pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT; 134 ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT); 135 nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC); 136 prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); 137 if (!nx) 138 prot &= ~_PAGE_NOEXEC; 139 ptep = pt_dir; 140 for (i = 0; i < PTRS_PER_PTE; i++) { 141 set_pte(ptep, __pte(pte_addr | prot)); 142 pte_addr += PAGE_SIZE; 143 ptep++; 144 } 145 new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY); 146 pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); 147 update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE); 148 update_page_count(PG_DIRECT_MAP_1M, -1); 149 return 0; 150 } 151 152 static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, 153 unsigned long flags) 154 { 155 pmd_t new = *pmdp; 156 157 if (flags & SET_MEMORY_RO) 158 new = pmd_wrprotect(new); 159 else if (flags & SET_MEMORY_RW) 160 new = pmd_mkwrite_novma(pmd_mkdirty(new)); 161 if (flags & SET_MEMORY_NX) 162 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); 163 else if (flags & SET_MEMORY_X) 164 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); 165 if (flags & SET_MEMORY_INV) { 166 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); 167 } else if (flags & SET_MEMORY_DEF) { 168 new = __pmd(pmd_val(new) & PMD_MASK); 169 new = set_pmd_bit(new, SEGMENT_KERNEL); 170 if (!MACHINE_HAS_NX) 171 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); 172 } 173 pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); 174 } 175 176 static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end, 177 unsigned long flags) 178 { 179 unsigned long next; 180 int need_split; 181 pmd_t *pmdp; 182 int rc = 0; 183 184 pmdp = pmd_offset(pudp, addr); 185 do { 186 if (pmd_none(*pmdp)) 187 return -EINVAL; 188 next = pmd_addr_end(addr, end); 189 if (pmd_leaf(*pmdp)) { 190 need_split = !!(flags & SET_MEMORY_4K); 191 need_split |= !!(addr & ~PMD_MASK); 192 need_split |= !!(addr + PMD_SIZE > next); 193 if (need_split) { 194 rc = split_pmd_page(pmdp, addr); 195 if (rc) 196 return rc; 197 continue; 198 } 199 modify_pmd_page(pmdp, addr, flags); 200 } else { 201 rc = walk_pte_level(pmdp, addr, next, flags); 202 if (rc) 203 return rc; 204 } 205 pmdp++; 206 addr = next; 207 cond_resched(); 208 } while (addr < end); 209 return rc; 210 } 211 212 static int split_pud_page(pud_t *pudp, unsigned long addr) 213 { 214 unsigned long pmd_addr, prot; 215 pmd_t *pm_dir, *pmdp; 216 pud_t new; 217 int i, ro, nx; 218 219 pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); 220 if (!pm_dir) 221 return -ENOMEM; 222 pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT; 223 ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT); 224 nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC); 225 prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL); 226 if (!nx) 227 prot &= ~_SEGMENT_ENTRY_NOEXEC; 228 pmdp = pm_dir; 229 for (i = 0; i < PTRS_PER_PMD; i++) { 230 set_pmd(pmdp, __pmd(pmd_addr | prot)); 231 pmd_addr += PMD_SIZE; 232 pmdp++; 233 } 234 new = __pud(__pa(pm_dir) | _REGION3_ENTRY); 235 pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); 236 update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD); 237 update_page_count(PG_DIRECT_MAP_2G, -1); 238 return 0; 239 } 240 241 static void modify_pud_page(pud_t *pudp, unsigned long addr, 242 unsigned long flags) 243 { 244 pud_t new = *pudp; 245 246 if (flags & SET_MEMORY_RO) 247 new = pud_wrprotect(new); 248 else if (flags & SET_MEMORY_RW) 249 new = pud_mkwrite(pud_mkdirty(new)); 250 if (flags & SET_MEMORY_NX) 251 new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); 252 else if (flags & SET_MEMORY_X) 253 new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); 254 if (flags & SET_MEMORY_INV) { 255 new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID)); 256 } else if (flags & SET_MEMORY_DEF) { 257 new = __pud(pud_val(new) & PUD_MASK); 258 new = set_pud_bit(new, REGION3_KERNEL); 259 if (!MACHINE_HAS_NX) 260 new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); 261 } 262 pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); 263 } 264 265 static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end, 266 unsigned long flags) 267 { 268 unsigned long next; 269 int need_split; 270 pud_t *pudp; 271 int rc = 0; 272 273 pudp = pud_offset(p4d, addr); 274 do { 275 if (pud_none(*pudp)) 276 return -EINVAL; 277 next = pud_addr_end(addr, end); 278 if (pud_leaf(*pudp)) { 279 need_split = !!(flags & SET_MEMORY_4K); 280 need_split |= !!(addr & ~PUD_MASK); 281 need_split |= !!(addr + PUD_SIZE > next); 282 if (need_split) { 283 rc = split_pud_page(pudp, addr); 284 if (rc) 285 break; 286 continue; 287 } 288 modify_pud_page(pudp, addr, flags); 289 } else { 290 rc = walk_pmd_level(pudp, addr, next, flags); 291 } 292 pudp++; 293 addr = next; 294 cond_resched(); 295 } while (addr < end && !rc); 296 return rc; 297 } 298 299 static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end, 300 unsigned long flags) 301 { 302 unsigned long next; 303 p4d_t *p4dp; 304 int rc = 0; 305 306 p4dp = p4d_offset(pgd, addr); 307 do { 308 if (p4d_none(*p4dp)) 309 return -EINVAL; 310 next = p4d_addr_end(addr, end); 311 rc = walk_pud_level(p4dp, addr, next, flags); 312 p4dp++; 313 addr = next; 314 cond_resched(); 315 } while (addr < end && !rc); 316 return rc; 317 } 318 319 DEFINE_MUTEX(cpa_mutex); 320 321 static int change_page_attr(unsigned long addr, unsigned long end, 322 unsigned long flags) 323 { 324 unsigned long next; 325 int rc = -EINVAL; 326 pgd_t *pgdp; 327 328 pgdp = pgd_offset_k(addr); 329 do { 330 if (pgd_none(*pgdp)) 331 break; 332 next = pgd_addr_end(addr, end); 333 rc = walk_p4d_level(pgdp, addr, next, flags); 334 if (rc) 335 break; 336 cond_resched(); 337 } while (pgdp++, addr = next, addr < end && !rc); 338 return rc; 339 } 340 341 static int change_page_attr_alias(unsigned long addr, unsigned long end, 342 unsigned long flags) 343 { 344 unsigned long alias, offset, va_start, va_end; 345 struct vm_struct *area; 346 int rc = 0; 347 348 /* 349 * Changes to read-only permissions on kernel VA mappings are also 350 * applied to the kernel direct mapping. Execute permissions are 351 * intentionally not transferred to keep all allocated pages within 352 * the direct mapping non-executable. 353 */ 354 flags &= SET_MEMORY_RO | SET_MEMORY_RW; 355 if (!flags) 356 return 0; 357 area = NULL; 358 while (addr < end) { 359 if (!area) 360 area = find_vm_area((void *)addr); 361 if (!area || !(area->flags & VM_ALLOC)) 362 return 0; 363 va_start = (unsigned long)area->addr; 364 va_end = va_start + area->nr_pages * PAGE_SIZE; 365 offset = (addr - va_start) >> PAGE_SHIFT; 366 alias = (unsigned long)page_address(area->pages[offset]); 367 rc = change_page_attr(alias, alias + PAGE_SIZE, flags); 368 if (rc) 369 break; 370 addr += PAGE_SIZE; 371 if (addr >= va_end) 372 area = NULL; 373 } 374 return rc; 375 } 376 377 int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags) 378 { 379 unsigned long end; 380 int rc; 381 382 if (!MACHINE_HAS_NX) 383 flags &= ~(SET_MEMORY_NX | SET_MEMORY_X); 384 if (!flags) 385 return 0; 386 if (!numpages) 387 return 0; 388 addr &= PAGE_MASK; 389 end = addr + numpages * PAGE_SIZE; 390 mutex_lock(&cpa_mutex); 391 rc = change_page_attr(addr, end, flags); 392 if (rc) 393 goto out; 394 rc = change_page_attr_alias(addr, end, flags); 395 out: 396 mutex_unlock(&cpa_mutex); 397 return rc; 398 } 399 400 int set_direct_map_invalid_noflush(struct page *page) 401 { 402 return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV); 403 } 404 405 int set_direct_map_default_noflush(struct page *page) 406 { 407 return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF); 408 } 409 410 int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) 411 { 412 unsigned long flags; 413 414 if (valid) 415 flags = SET_MEMORY_DEF; 416 else 417 flags = SET_MEMORY_INV; 418 419 return __set_memory((unsigned long)page_to_virt(page), nr, flags); 420 } 421 422 bool kernel_page_present(struct page *page) 423 { 424 unsigned long addr; 425 unsigned int cc; 426 427 addr = (unsigned long)page_address(page); 428 asm volatile( 429 " lra %[addr],0(%[addr])\n" 430 CC_IPM(cc) 431 : CC_OUT(cc, cc), [addr] "+a" (addr) 432 : 433 : CC_CLOBBER); 434 return CC_TRANSFORM(cc) == 0; 435 } 436 437 #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) 438 439 static void ipte_range(pte_t *pte, unsigned long address, int nr) 440 { 441 int i; 442 443 if (test_facility(13)) { 444 __ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL); 445 return; 446 } 447 for (i = 0; i < nr; i++) { 448 __ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL); 449 address += PAGE_SIZE; 450 pte++; 451 } 452 } 453 454 void __kernel_map_pages(struct page *page, int numpages, int enable) 455 { 456 unsigned long address; 457 pte_t *ptep, pte; 458 int nr, i, j; 459 460 for (i = 0; i < numpages;) { 461 address = (unsigned long)page_to_virt(page + i); 462 ptep = virt_to_kpte(address); 463 nr = (unsigned long)ptep >> ilog2(sizeof(long)); 464 nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1)); 465 nr = min(numpages - i, nr); 466 if (enable) { 467 for (j = 0; j < nr; j++) { 468 pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID)); 469 set_pte(ptep, pte); 470 address += PAGE_SIZE; 471 ptep++; 472 } 473 } else { 474 ipte_range(ptep, address, nr); 475 } 476 i += nr; 477 } 478 } 479 480 #endif /* CONFIG_DEBUG_PAGEALLOC */ 481