1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * VDSO implementations. 4 * 5 * Copyright (C) 2012 ARM Limited 6 * 7 * Author: Will Deacon <will.deacon@arm.com> 8 */ 9 10 #include <linux/cache.h> 11 #include <linux/clocksource.h> 12 #include <linux/elf.h> 13 #include <linux/err.h> 14 #include <linux/errno.h> 15 #include <linux/gfp.h> 16 #include <linux/kernel.h> 17 #include <linux/mm.h> 18 #include <linux/sched.h> 19 #include <linux/signal.h> 20 #include <linux/slab.h> 21 #include <linux/time_namespace.h> 22 #include <linux/timekeeper_internal.h> 23 #include <linux/vmalloc.h> 24 #include <vdso/datapage.h> 25 #include <vdso/helpers.h> 26 #include <vdso/vsyscall.h> 27 28 #include <asm/cacheflush.h> 29 #include <asm/signal32.h> 30 #include <asm/vdso.h> 31 32 enum vdso_abi { 33 VDSO_ABI_AA64, 34 VDSO_ABI_AA32, 35 }; 36 37 enum vvar_pages { 38 VVAR_DATA_PAGE_OFFSET, 39 VVAR_TIMENS_PAGE_OFFSET, 40 VVAR_NR_PAGES, 41 }; 42 43 struct vdso_abi_info { 44 const char *name; 45 const char *vdso_code_start; 46 const char *vdso_code_end; 47 unsigned long vdso_pages; 48 /* Data Mapping */ 49 struct vm_special_mapping *dm; 50 /* Code Mapping */ 51 struct vm_special_mapping *cm; 52 }; 53 54 static struct vdso_abi_info vdso_info[] __ro_after_init = { 55 [VDSO_ABI_AA64] = { 56 .name = "vdso", 57 .vdso_code_start = vdso_start, 58 .vdso_code_end = vdso_end, 59 }, 60 #ifdef CONFIG_COMPAT_VDSO 61 [VDSO_ABI_AA32] = { 62 .name = "vdso32", 63 .vdso_code_start = vdso32_start, 64 .vdso_code_end = vdso32_end, 65 }, 66 #endif /* CONFIG_COMPAT_VDSO */ 67 }; 68 69 /* 70 * The vDSO data page. 71 */ 72 static union vdso_data_store vdso_data_store __page_aligned_data; 73 struct vdso_data *vdso_data = vdso_data_store.data; 74 75 static int vdso_mremap(const struct vm_special_mapping *sm, 76 struct vm_area_struct *new_vma) 77 { 78 current->mm->context.vdso = (void *)new_vma->vm_start; 79 80 return 0; 81 } 82 83 static int __init __vdso_init(enum vdso_abi abi) 84 { 85 int i; 86 struct page **vdso_pagelist; 87 unsigned long pfn; 88 89 if (memcmp(vdso_info[abi].vdso_code_start, "\177ELF", 4)) { 90 pr_err("vDSO is not a valid ELF object!\n"); 91 return -EINVAL; 92 } 93 94 vdso_info[abi].vdso_pages = ( 95 vdso_info[abi].vdso_code_end - 96 vdso_info[abi].vdso_code_start) >> 97 PAGE_SHIFT; 98 99 vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages, 100 sizeof(struct page *), 101 GFP_KERNEL); 102 if (vdso_pagelist == NULL) 103 return -ENOMEM; 104 105 /* Grab the vDSO code pages. */ 106 pfn = sym_to_pfn(vdso_info[abi].vdso_code_start); 107 108 for (i = 0; i < vdso_info[abi].vdso_pages; i++) 109 vdso_pagelist[i] = pfn_to_page(pfn + i); 110 111 vdso_info[abi].cm->pages = vdso_pagelist; 112 113 return 0; 114 } 115 116 #ifdef CONFIG_TIME_NS 117 struct vdso_data *arch_get_vdso_data(void *vvar_page) 118 { 119 return (struct vdso_data *)(vvar_page); 120 } 121 122 /* 123 * The vvar mapping contains data for a specific time namespace, so when a task 124 * changes namespace we must unmap its vvar data for the old namespace. 125 * Subsequent faults will map in data for the new namespace. 126 * 127 * For more details see timens_setup_vdso_data(). 128 */ 129 int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) 130 { 131 struct mm_struct *mm = task->mm; 132 struct vm_area_struct *vma; 133 VMA_ITERATOR(vmi, mm, 0); 134 135 mmap_read_lock(mm); 136 137 for_each_vma(vmi, vma) { 138 if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm)) 139 zap_vma_pages(vma); 140 #ifdef CONFIG_COMPAT_VDSO 141 if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm)) 142 zap_vma_pages(vma); 143 #endif 144 } 145 146 mmap_read_unlock(mm); 147 return 0; 148 } 149 #endif 150 151 static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, 152 struct vm_area_struct *vma, struct vm_fault *vmf) 153 { 154 struct page *timens_page = find_timens_vvar_page(vma); 155 unsigned long pfn; 156 157 switch (vmf->pgoff) { 158 case VVAR_DATA_PAGE_OFFSET: 159 if (timens_page) 160 pfn = page_to_pfn(timens_page); 161 else 162 pfn = sym_to_pfn(vdso_data); 163 break; 164 #ifdef CONFIG_TIME_NS 165 case VVAR_TIMENS_PAGE_OFFSET: 166 /* 167 * If a task belongs to a time namespace then a namespace 168 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and 169 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET 170 * offset. 171 * See also the comment near timens_setup_vdso_data(). 172 */ 173 if (!timens_page) 174 return VM_FAULT_SIGBUS; 175 pfn = sym_to_pfn(vdso_data); 176 break; 177 #endif /* CONFIG_TIME_NS */ 178 default: 179 return VM_FAULT_SIGBUS; 180 } 181 182 return vmf_insert_pfn(vma, vmf->address, pfn); 183 } 184 185 static int __setup_additional_pages(enum vdso_abi abi, 186 struct mm_struct *mm, 187 struct linux_binprm *bprm, 188 int uses_interp) 189 { 190 unsigned long vdso_base, vdso_text_len, vdso_mapping_len; 191 unsigned long gp_flags = 0; 192 void *ret; 193 194 BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); 195 196 vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT; 197 /* Be sure to map the data page */ 198 vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE; 199 200 vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); 201 if (IS_ERR_VALUE(vdso_base)) { 202 ret = ERR_PTR(vdso_base); 203 goto up_fail; 204 } 205 206 ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE, 207 VM_READ|VM_MAYREAD|VM_PFNMAP, 208 vdso_info[abi].dm); 209 if (IS_ERR(ret)) 210 goto up_fail; 211 212 if (system_supports_bti_kernel()) 213 gp_flags = VM_ARM64_BTI; 214 215 vdso_base += VVAR_NR_PAGES * PAGE_SIZE; 216 mm->context.vdso = (void *)vdso_base; 217 ret = _install_special_mapping(mm, vdso_base, vdso_text_len, 218 VM_READ|VM_EXEC|gp_flags| 219 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 220 vdso_info[abi].cm); 221 if (IS_ERR(ret)) 222 goto up_fail; 223 224 return 0; 225 226 up_fail: 227 mm->context.vdso = NULL; 228 return PTR_ERR(ret); 229 } 230 231 #ifdef CONFIG_COMPAT 232 /* 233 * Create and map the vectors page for AArch32 tasks. 234 */ 235 enum aarch32_map { 236 AA32_MAP_VECTORS, /* kuser helpers */ 237 AA32_MAP_SIGPAGE, 238 AA32_MAP_VVAR, 239 AA32_MAP_VDSO, 240 }; 241 242 static struct page *aarch32_vectors_page __ro_after_init; 243 static struct page *aarch32_sig_page __ro_after_init; 244 245 static int aarch32_sigpage_mremap(const struct vm_special_mapping *sm, 246 struct vm_area_struct *new_vma) 247 { 248 current->mm->context.sigpage = (void *)new_vma->vm_start; 249 250 return 0; 251 } 252 253 static struct vm_special_mapping aarch32_vdso_maps[] = { 254 [AA32_MAP_VECTORS] = { 255 .name = "[vectors]", /* ABI */ 256 .pages = &aarch32_vectors_page, 257 }, 258 [AA32_MAP_SIGPAGE] = { 259 .name = "[sigpage]", /* ABI */ 260 .pages = &aarch32_sig_page, 261 .mremap = aarch32_sigpage_mremap, 262 }, 263 [AA32_MAP_VVAR] = { 264 .name = "[vvar]", 265 .fault = vvar_fault, 266 }, 267 [AA32_MAP_VDSO] = { 268 .name = "[vdso]", 269 .mremap = vdso_mremap, 270 }, 271 }; 272 273 static int aarch32_alloc_kuser_vdso_page(void) 274 { 275 extern char __kuser_helper_start[], __kuser_helper_end[]; 276 int kuser_sz = __kuser_helper_end - __kuser_helper_start; 277 unsigned long vdso_page; 278 279 if (!IS_ENABLED(CONFIG_KUSER_HELPERS)) 280 return 0; 281 282 vdso_page = get_zeroed_page(GFP_KERNEL); 283 if (!vdso_page) 284 return -ENOMEM; 285 286 memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start, 287 kuser_sz); 288 aarch32_vectors_page = virt_to_page((void *)vdso_page); 289 return 0; 290 } 291 292 #define COMPAT_SIGPAGE_POISON_WORD 0xe7fddef1 293 static int aarch32_alloc_sigpage(void) 294 { 295 extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; 296 int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; 297 __le32 poison = cpu_to_le32(COMPAT_SIGPAGE_POISON_WORD); 298 void *sigpage; 299 300 sigpage = (void *)__get_free_page(GFP_KERNEL); 301 if (!sigpage) 302 return -ENOMEM; 303 304 memset32(sigpage, (__force u32)poison, PAGE_SIZE / sizeof(poison)); 305 memcpy(sigpage, __aarch32_sigret_code_start, sigret_sz); 306 aarch32_sig_page = virt_to_page(sigpage); 307 return 0; 308 } 309 310 static int __init __aarch32_alloc_vdso_pages(void) 311 { 312 313 if (!IS_ENABLED(CONFIG_COMPAT_VDSO)) 314 return 0; 315 316 vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; 317 vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; 318 319 return __vdso_init(VDSO_ABI_AA32); 320 } 321 322 static int __init aarch32_alloc_vdso_pages(void) 323 { 324 int ret; 325 326 ret = __aarch32_alloc_vdso_pages(); 327 if (ret) 328 return ret; 329 330 ret = aarch32_alloc_sigpage(); 331 if (ret) 332 return ret; 333 334 return aarch32_alloc_kuser_vdso_page(); 335 } 336 arch_initcall(aarch32_alloc_vdso_pages); 337 338 static int aarch32_kuser_helpers_setup(struct mm_struct *mm) 339 { 340 void *ret; 341 342 if (!IS_ENABLED(CONFIG_KUSER_HELPERS)) 343 return 0; 344 345 /* 346 * Avoid VM_MAYWRITE for compatibility with arch/arm/, where it's 347 * not safe to CoW the page containing the CPU exception vectors. 348 */ 349 ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE, 350 VM_READ | VM_EXEC | 351 VM_MAYREAD | VM_MAYEXEC, 352 &aarch32_vdso_maps[AA32_MAP_VECTORS]); 353 354 return PTR_ERR_OR_ZERO(ret); 355 } 356 357 static int aarch32_sigreturn_setup(struct mm_struct *mm) 358 { 359 unsigned long addr; 360 void *ret; 361 362 addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); 363 if (IS_ERR_VALUE(addr)) { 364 ret = ERR_PTR(addr); 365 goto out; 366 } 367 368 /* 369 * VM_MAYWRITE is required to allow gdb to Copy-on-Write and 370 * set breakpoints. 371 */ 372 ret = _install_special_mapping(mm, addr, PAGE_SIZE, 373 VM_READ | VM_EXEC | VM_MAYREAD | 374 VM_MAYWRITE | VM_MAYEXEC, 375 &aarch32_vdso_maps[AA32_MAP_SIGPAGE]); 376 if (IS_ERR(ret)) 377 goto out; 378 379 mm->context.sigpage = (void *)addr; 380 381 out: 382 return PTR_ERR_OR_ZERO(ret); 383 } 384 385 int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) 386 { 387 struct mm_struct *mm = current->mm; 388 int ret; 389 390 if (mmap_write_lock_killable(mm)) 391 return -EINTR; 392 393 ret = aarch32_kuser_helpers_setup(mm); 394 if (ret) 395 goto out; 396 397 if (IS_ENABLED(CONFIG_COMPAT_VDSO)) { 398 ret = __setup_additional_pages(VDSO_ABI_AA32, mm, bprm, 399 uses_interp); 400 if (ret) 401 goto out; 402 } 403 404 ret = aarch32_sigreturn_setup(mm); 405 out: 406 mmap_write_unlock(mm); 407 return ret; 408 } 409 #endif /* CONFIG_COMPAT */ 410 411 enum aarch64_map { 412 AA64_MAP_VVAR, 413 AA64_MAP_VDSO, 414 }; 415 416 static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = { 417 [AA64_MAP_VVAR] = { 418 .name = "[vvar]", 419 .fault = vvar_fault, 420 }, 421 [AA64_MAP_VDSO] = { 422 .name = "[vdso]", 423 .mremap = vdso_mremap, 424 }, 425 }; 426 427 static int __init vdso_init(void) 428 { 429 vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR]; 430 vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO]; 431 432 return __vdso_init(VDSO_ABI_AA64); 433 } 434 arch_initcall(vdso_init); 435 436 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) 437 { 438 struct mm_struct *mm = current->mm; 439 int ret; 440 441 if (mmap_write_lock_killable(mm)) 442 return -EINTR; 443 444 ret = __setup_additional_pages(VDSO_ABI_AA64, mm, bprm, uses_interp); 445 mmap_write_unlock(mm); 446 447 return ret; 448 } 449