1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * kexec.c - kexec_load system call 4 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 5 */ 6 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 9 #include <linux/capability.h> 10 #include <linux/mm.h> 11 #include <linux/file.h> 12 #include <linux/security.h> 13 #include <linux/kexec.h> 14 #include <linux/mutex.h> 15 #include <linux/list.h> 16 #include <linux/syscalls.h> 17 #include <linux/vmalloc.h> 18 #include <linux/slab.h> 19 20 #include "kexec_internal.h" 21 22 static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, 23 unsigned long nr_segments, 24 struct kexec_segment *segments, 25 unsigned long flags) 26 { 27 int ret; 28 struct kimage *image; 29 bool kexec_on_panic = flags & KEXEC_ON_CRASH; 30 31 #ifdef CONFIG_CRASH_DUMP 32 if (kexec_on_panic) { 33 /* Verify we have a valid entry point */ 34 if ((entry < phys_to_boot_phys(crashk_res.start)) || 35 (entry > phys_to_boot_phys(crashk_res.end))) 36 return -EADDRNOTAVAIL; 37 } 38 #endif 39 40 /* Allocate and initialize a controlling structure */ 41 image = do_kimage_alloc_init(); 42 if (!image) 43 return -ENOMEM; 44 45 image->start = entry; 46 image->nr_segments = nr_segments; 47 memcpy(image->segment, segments, nr_segments * sizeof(*segments)); 48 49 #ifdef CONFIG_CRASH_DUMP 50 if (kexec_on_panic) { 51 /* Enable special crash kernel control page alloc policy. */ 52 image->control_page = crashk_res.start; 53 image->type = KEXEC_TYPE_CRASH; 54 } 55 #endif 56 57 ret = sanity_check_segment_list(image); 58 if (ret) 59 goto out_free_image; 60 61 /* 62 * Find a location for the control code buffer, and add it 63 * the vector of segments so that it's pages will also be 64 * counted as destination pages. 65 */ 66 ret = -ENOMEM; 67 image->control_code_page = kimage_alloc_control_pages(image, 68 get_order(KEXEC_CONTROL_PAGE_SIZE)); 69 if (!image->control_code_page) { 70 pr_err("Could not allocate control_code_buffer\n"); 71 goto out_free_image; 72 } 73 74 if (!kexec_on_panic) { 75 image->swap_page = kimage_alloc_control_pages(image, 0); 76 if (!image->swap_page) { 77 pr_err("Could not allocate swap buffer\n"); 78 goto out_free_control_pages; 79 } 80 } 81 82 *rimage = image; 83 return 0; 84 out_free_control_pages: 85 kimage_free_page_list(&image->control_pages); 86 out_free_image: 87 kfree(image); 88 return ret; 89 } 90 91 static int do_kexec_load(unsigned long entry, unsigned long nr_segments, 92 struct kexec_segment *segments, unsigned long flags) 93 { 94 struct kimage **dest_image, *image; 95 unsigned long i; 96 int ret; 97 98 /* 99 * Because we write directly to the reserved memory region when loading 100 * crash kernels we need a serialization here to prevent multiple crash 101 * kernels from attempting to load simultaneously. 102 */ 103 if (!kexec_trylock()) 104 return -EBUSY; 105 106 #ifdef CONFIG_CRASH_DUMP 107 if (flags & KEXEC_ON_CRASH) { 108 dest_image = &kexec_crash_image; 109 if (kexec_crash_image) 110 arch_kexec_unprotect_crashkres(); 111 } else 112 #endif 113 dest_image = &kexec_image; 114 115 if (nr_segments == 0) { 116 /* Uninstall image */ 117 kimage_free(xchg(dest_image, NULL)); 118 ret = 0; 119 goto out_unlock; 120 } 121 if (flags & KEXEC_ON_CRASH) { 122 /* 123 * Loading another kernel to switch to if this one 124 * crashes. Free any current crash dump kernel before 125 * we corrupt it. 126 */ 127 kimage_free(xchg(&kexec_crash_image, NULL)); 128 } 129 130 ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags); 131 if (ret) 132 goto out_unlock; 133 134 if (flags & KEXEC_PRESERVE_CONTEXT) 135 image->preserve_context = 1; 136 137 #ifdef CONFIG_CRASH_HOTPLUG 138 if (flags & KEXEC_UPDATE_ELFCOREHDR) 139 image->update_elfcorehdr = 1; 140 #endif 141 142 ret = machine_kexec_prepare(image); 143 if (ret) 144 goto out; 145 146 /* 147 * Some architecture(like S390) may touch the crash memory before 148 * machine_kexec_prepare(), we must copy vmcoreinfo data after it. 149 */ 150 ret = kimage_crash_copy_vmcoreinfo(image); 151 if (ret) 152 goto out; 153 154 for (i = 0; i < nr_segments; i++) { 155 ret = kimage_load_segment(image, &image->segment[i]); 156 if (ret) 157 goto out; 158 } 159 160 kimage_terminate(image); 161 162 ret = machine_kexec_post_load(image); 163 if (ret) 164 goto out; 165 166 /* Install the new kernel and uninstall the old */ 167 image = xchg(dest_image, image); 168 169 out: 170 #ifdef CONFIG_CRASH_DUMP 171 if ((flags & KEXEC_ON_CRASH) && kexec_crash_image) 172 arch_kexec_protect_crashkres(); 173 #endif 174 175 kimage_free(image); 176 out_unlock: 177 kexec_unlock(); 178 return ret; 179 } 180 181 /* 182 * Exec Kernel system call: for obvious reasons only root may call it. 183 * 184 * This call breaks up into three pieces. 185 * - A generic part which loads the new kernel from the current 186 * address space, and very carefully places the data in the 187 * allocated pages. 188 * 189 * - A generic part that interacts with the kernel and tells all of 190 * the devices to shut down. Preventing on-going dmas, and placing 191 * the devices in a consistent state so a later kernel can 192 * reinitialize them. 193 * 194 * - A machine specific part that includes the syscall number 195 * and then copies the image to it's final destination. And 196 * jumps into the image at entry. 197 * 198 * kexec does not sync, or unmount filesystems so if you need 199 * that to happen you need to do that yourself. 200 */ 201 202 static inline int kexec_load_check(unsigned long nr_segments, 203 unsigned long flags) 204 { 205 int image_type = (flags & KEXEC_ON_CRASH) ? 206 KEXEC_TYPE_CRASH : KEXEC_TYPE_DEFAULT; 207 int result; 208 209 /* We only trust the superuser with rebooting the system. */ 210 if (!kexec_load_permitted(image_type)) 211 return -EPERM; 212 213 /* Permit LSMs and IMA to fail the kexec */ 214 result = security_kernel_load_data(LOADING_KEXEC_IMAGE, false); 215 if (result < 0) 216 return result; 217 218 /* 219 * kexec can be used to circumvent module loading restrictions, so 220 * prevent loading in that case 221 */ 222 result = security_locked_down(LOCKDOWN_KEXEC); 223 if (result) 224 return result; 225 226 /* 227 * Verify we have a legal set of flags 228 * This leaves us room for future extensions. 229 */ 230 if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK)) 231 return -EINVAL; 232 233 /* Put an artificial cap on the number 234 * of segments passed to kexec_load. 235 */ 236 if (nr_segments > KEXEC_SEGMENT_MAX) 237 return -EINVAL; 238 239 return 0; 240 } 241 242 SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, 243 struct kexec_segment __user *, segments, unsigned long, flags) 244 { 245 struct kexec_segment *ksegments; 246 unsigned long result; 247 248 result = kexec_load_check(nr_segments, flags); 249 if (result) 250 return result; 251 252 /* Verify we are on the appropriate architecture */ 253 if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && 254 ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) 255 return -EINVAL; 256 257 ksegments = memdup_array_user(segments, nr_segments, sizeof(ksegments[0])); 258 if (IS_ERR(ksegments)) 259 return PTR_ERR(ksegments); 260 261 result = do_kexec_load(entry, nr_segments, ksegments, flags); 262 kfree(ksegments); 263 264 return result; 265 } 266 267 #ifdef CONFIG_COMPAT 268 COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, 269 compat_ulong_t, nr_segments, 270 struct compat_kexec_segment __user *, segments, 271 compat_ulong_t, flags) 272 { 273 struct compat_kexec_segment in; 274 struct kexec_segment *ksegments; 275 unsigned long i, result; 276 277 result = kexec_load_check(nr_segments, flags); 278 if (result) 279 return result; 280 281 /* Don't allow clients that don't understand the native 282 * architecture to do anything. 283 */ 284 if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) 285 return -EINVAL; 286 287 ksegments = kmalloc_array(nr_segments, sizeof(ksegments[0]), 288 GFP_KERNEL); 289 if (!ksegments) 290 return -ENOMEM; 291 292 for (i = 0; i < nr_segments; i++) { 293 result = copy_from_user(&in, &segments[i], sizeof(in)); 294 if (result) 295 goto fail; 296 297 ksegments[i].buf = compat_ptr(in.buf); 298 ksegments[i].bufsz = in.bufsz; 299 ksegments[i].mem = in.mem; 300 ksegments[i].memsz = in.memsz; 301 } 302 303 result = do_kexec_load(entry, nr_segments, ksegments, flags); 304 305 fail: 306 kfree(ksegments); 307 return result; 308 } 309 #endif 310