1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(c) 2014 Intel Mobile Communications GmbH 4 * Copyright(c) 2015 Intel Deutschland GmbH 5 * 6 * Author: Johannes Berg <johannes@sipsolutions.net> 7 */ 8 #include <linux/module.h> 9 #include <linux/device.h> 10 #include <linux/devcoredump.h> 11 #include <linux/list.h> 12 #include <linux/slab.h> 13 #include <linux/fs.h> 14 #include <linux/workqueue.h> 15 16 static struct class devcd_class; 17 18 /* global disable flag, for security purposes */ 19 static bool devcd_disabled; 20 21 struct devcd_entry { 22 struct device devcd_dev; 23 void *data; 24 size_t datalen; 25 /* 26 * There are 2 races for which mutex is required. 27 * 28 * The first race is between device creation and userspace writing to 29 * schedule immediately destruction. 30 * 31 * This race is handled by arming the timer before device creation, but 32 * when device creation fails the timer still exists. 33 * 34 * To solve this, hold the mutex during device_add(), and set 35 * init_completed on success before releasing the mutex. 36 * 37 * That way the timer will never fire until device_add() is called, 38 * it will do nothing if init_completed is not set. The timer is also 39 * cancelled in that case. 40 * 41 * The second race involves multiple parallel invocations of devcd_free(), 42 * add a deleted flag so only 1 can call the destructor. 43 */ 44 struct mutex mutex; 45 bool init_completed, deleted; 46 struct module *owner; 47 ssize_t (*read)(char *buffer, loff_t offset, size_t count, 48 void *data, size_t datalen); 49 void (*free)(void *data); 50 /* 51 * If nothing interferes and device_add() was returns success, 52 * del_wk will destroy the device after the timer fires. 53 * 54 * Multiple userspace processes can interfere in the working of the timer: 55 * - Writing to the coredump will reschedule the timer to run immediately, 56 * if still armed. 57 * 58 * This is handled by using "if (cancel_delayed_work()) { 59 * schedule_delayed_work() }", to prevent re-arming after having 60 * been previously fired. 61 * - Writing to /sys/class/devcoredump/disabled will destroy the 62 * coredump synchronously. 63 * This is handled by using disable_delayed_work_sync(), and then 64 * checking if deleted flag is set with &devcd->mutex held. 65 */ 66 struct delayed_work del_wk; 67 struct device *failing_dev; 68 }; 69 70 static struct devcd_entry *dev_to_devcd(struct device *dev) 71 { 72 return container_of(dev, struct devcd_entry, devcd_dev); 73 } 74 75 static void devcd_dev_release(struct device *dev) 76 { 77 struct devcd_entry *devcd = dev_to_devcd(dev); 78 79 devcd->free(devcd->data); 80 module_put(devcd->owner); 81 82 /* 83 * this seems racy, but I don't see a notifier or such on 84 * a struct device to know when it goes away? 85 */ 86 if (devcd->failing_dev->kobj.sd) 87 sysfs_delete_link(&devcd->failing_dev->kobj, &dev->kobj, 88 "devcoredump"); 89 90 put_device(devcd->failing_dev); 91 kfree(devcd); 92 } 93 94 static void __devcd_del(struct devcd_entry *devcd) 95 { 96 devcd->deleted = true; 97 device_del(&devcd->devcd_dev); 98 put_device(&devcd->devcd_dev); 99 } 100 101 static void devcd_del(struct work_struct *wk) 102 { 103 struct devcd_entry *devcd; 104 bool init_completed; 105 106 devcd = container_of(wk, struct devcd_entry, del_wk.work); 107 108 /* devcd->mutex serializes against dev_coredumpm_timeout */ 109 mutex_lock(&devcd->mutex); 110 init_completed = devcd->init_completed; 111 mutex_unlock(&devcd->mutex); 112 113 if (init_completed) 114 __devcd_del(devcd); 115 } 116 117 static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj, 118 const struct bin_attribute *bin_attr, 119 char *buffer, loff_t offset, size_t count) 120 { 121 struct device *dev = kobj_to_dev(kobj); 122 struct devcd_entry *devcd = dev_to_devcd(dev); 123 124 return devcd->read(buffer, offset, count, devcd->data, devcd->datalen); 125 } 126 127 static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj, 128 const struct bin_attribute *bin_attr, 129 char *buffer, loff_t offset, size_t count) 130 { 131 struct device *dev = kobj_to_dev(kobj); 132 struct devcd_entry *devcd = dev_to_devcd(dev); 133 134 /* 135 * Although it's tempting to use mod_delayed work here, 136 * that will cause a reschedule if the timer already fired. 137 */ 138 if (cancel_delayed_work(&devcd->del_wk)) 139 schedule_delayed_work(&devcd->del_wk, 0); 140 141 return count; 142 } 143 144 static const struct bin_attribute devcd_attr_data = 145 __BIN_ATTR(data, 0600, devcd_data_read, devcd_data_write, 0); 146 147 static const struct bin_attribute *const devcd_dev_bin_attrs[] = { 148 &devcd_attr_data, NULL, 149 }; 150 151 static const struct attribute_group devcd_dev_group = { 152 .bin_attrs = devcd_dev_bin_attrs, 153 }; 154 155 static const struct attribute_group *devcd_dev_groups[] = { 156 &devcd_dev_group, NULL, 157 }; 158 159 static int devcd_free(struct device *dev, void *data) 160 { 161 struct devcd_entry *devcd = dev_to_devcd(dev); 162 163 /* 164 * To prevent a race with devcd_data_write(), disable work and 165 * complete manually instead. 166 * 167 * We cannot rely on the return value of 168 * disable_delayed_work_sync() here, because it might be in the 169 * middle of a cancel_delayed_work + schedule_delayed_work pair. 170 * 171 * devcd->mutex here guards against multiple parallel invocations 172 * of devcd_free(). 173 */ 174 disable_delayed_work_sync(&devcd->del_wk); 175 mutex_lock(&devcd->mutex); 176 if (!devcd->deleted) 177 __devcd_del(devcd); 178 mutex_unlock(&devcd->mutex); 179 return 0; 180 } 181 182 static ssize_t disabled_show(const struct class *class, const struct class_attribute *attr, 183 char *buf) 184 { 185 return sysfs_emit(buf, "%d\n", devcd_disabled); 186 } 187 188 /* 189 * 190 * disabled_store() worker() 191 * class_for_each_device(&devcd_class, 192 * NULL, NULL, devcd_free) 193 * ... 194 * ... 195 * while ((dev = class_dev_iter_next(&iter)) 196 * devcd_del() 197 * device_del() 198 * put_device() <- last reference 199 * error = fn(dev, data) devcd_dev_release() 200 * devcd_free(dev, data) kfree(devcd) 201 * 202 * 203 * In the above diagram, it looks like disabled_store() would be racing with parallelly 204 * running devcd_del() and result in memory abort after dropping its last reference with 205 * put_device(). However, this will not happens as fn(dev, data) runs 206 * with its own reference to device via klist_node so it is not its last reference. 207 * so, above situation would not occur. 208 */ 209 210 static ssize_t disabled_store(const struct class *class, const struct class_attribute *attr, 211 const char *buf, size_t count) 212 { 213 long tmp = simple_strtol(buf, NULL, 10); 214 215 /* 216 * This essentially makes the attribute write-once, since you can't 217 * go back to not having it disabled. This is intentional, it serves 218 * as a system lockdown feature. 219 */ 220 if (tmp != 1) 221 return -EINVAL; 222 223 devcd_disabled = true; 224 225 class_for_each_device(&devcd_class, NULL, NULL, devcd_free); 226 227 return count; 228 } 229 static CLASS_ATTR_RW(disabled); 230 231 static struct attribute *devcd_class_attrs[] = { 232 &class_attr_disabled.attr, 233 NULL, 234 }; 235 ATTRIBUTE_GROUPS(devcd_class); 236 237 static struct class devcd_class = { 238 .name = "devcoredump", 239 .dev_release = devcd_dev_release, 240 .dev_groups = devcd_dev_groups, 241 .class_groups = devcd_class_groups, 242 }; 243 244 static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count, 245 void *data, size_t datalen) 246 { 247 return memory_read_from_buffer(buffer, count, &offset, data, datalen); 248 } 249 250 static void devcd_freev(void *data) 251 { 252 vfree(data); 253 } 254 255 /** 256 * dev_coredumpv - create device coredump with vmalloc data 257 * @dev: the struct device for the crashed device 258 * @data: vmalloc data containing the device coredump 259 * @datalen: length of the data 260 * @gfp: allocation flags 261 * 262 * This function takes ownership of the vmalloc'ed data and will free 263 * it when it is no longer used. See dev_coredumpm() for more information. 264 */ 265 void dev_coredumpv(struct device *dev, void *data, size_t datalen, 266 gfp_t gfp) 267 { 268 dev_coredumpm(dev, NULL, data, datalen, gfp, devcd_readv, devcd_freev); 269 } 270 EXPORT_SYMBOL_GPL(dev_coredumpv); 271 272 static int devcd_match_failing(struct device *dev, const void *failing) 273 { 274 struct devcd_entry *devcd = dev_to_devcd(dev); 275 276 return devcd->failing_dev == failing; 277 } 278 279 /** 280 * devcd_free_sgtable - free all the memory of the given scatterlist table 281 * (i.e. both pages and scatterlist instances) 282 * NOTE: if two tables allocated with devcd_alloc_sgtable and then chained 283 * using the sg_chain function then that function should be called only once 284 * on the chained table 285 * @data: pointer to sg_table to free 286 */ 287 static void devcd_free_sgtable(void *data) 288 { 289 _devcd_free_sgtable(data); 290 } 291 292 /** 293 * devcd_read_from_sgtable - copy data from sg_table to a given buffer 294 * and return the number of bytes read 295 * @buffer: the buffer to copy the data to it 296 * @buf_len: the length of the buffer 297 * @data: the scatterlist table to copy from 298 * @offset: start copy from @offset@ bytes from the head of the data 299 * in the given scatterlist 300 * @data_len: the length of the data in the sg_table 301 * 302 * Returns: the number of bytes copied 303 */ 304 static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset, 305 size_t buf_len, void *data, 306 size_t data_len) 307 { 308 struct scatterlist *table = data; 309 310 if (offset > data_len) 311 return -EINVAL; 312 313 if (offset + buf_len > data_len) 314 buf_len = data_len - offset; 315 return sg_pcopy_to_buffer(table, sg_nents(table), buffer, buf_len, 316 offset); 317 } 318 319 /** 320 * dev_coredump_put - remove device coredump 321 * @dev: the struct device for the crashed device 322 * 323 * dev_coredump_put() removes coredump, if exists, for a given device from 324 * the file system and free its associated data otherwise, does nothing. 325 * 326 * It is useful for modules that do not want to keep coredump 327 * available after its unload. 328 */ 329 void dev_coredump_put(struct device *dev) 330 { 331 struct device *existing; 332 333 existing = class_find_device(&devcd_class, NULL, dev, 334 devcd_match_failing); 335 if (existing) { 336 devcd_free(existing, NULL); 337 put_device(existing); 338 } 339 } 340 EXPORT_SYMBOL_GPL(dev_coredump_put); 341 342 /** 343 * dev_coredumpm_timeout - create device coredump with read/free methods with a 344 * custom timeout. 345 * @dev: the struct device for the crashed device 346 * @owner: the module that contains the read/free functions, use %THIS_MODULE 347 * @data: data cookie for the @read/@free functions 348 * @datalen: length of the data 349 * @gfp: allocation flags 350 * @read: function to read from the given buffer 351 * @free: function to free the given buffer 352 * @timeout: time in jiffies to remove coredump 353 * 354 * Creates a new device coredump for the given device. If a previous one hasn't 355 * been read yet, the new coredump is discarded. The data lifetime is determined 356 * by the device coredump framework and when it is no longer needed the @free 357 * function will be called to free the data. 358 */ 359 void dev_coredumpm_timeout(struct device *dev, struct module *owner, 360 void *data, size_t datalen, gfp_t gfp, 361 ssize_t (*read)(char *buffer, loff_t offset, 362 size_t count, void *data, 363 size_t datalen), 364 void (*free)(void *data), 365 unsigned long timeout) 366 { 367 static atomic_t devcd_count = ATOMIC_INIT(0); 368 struct devcd_entry *devcd; 369 struct device *existing; 370 371 if (devcd_disabled) 372 goto free; 373 374 existing = class_find_device(&devcd_class, NULL, dev, 375 devcd_match_failing); 376 if (existing) { 377 put_device(existing); 378 goto free; 379 } 380 381 if (!try_module_get(owner)) 382 goto free; 383 384 devcd = kzalloc(sizeof(*devcd), gfp); 385 if (!devcd) 386 goto put_module; 387 388 devcd->owner = owner; 389 devcd->data = data; 390 devcd->datalen = datalen; 391 devcd->read = read; 392 devcd->free = free; 393 devcd->failing_dev = get_device(dev); 394 devcd->deleted = false; 395 396 mutex_init(&devcd->mutex); 397 device_initialize(&devcd->devcd_dev); 398 399 dev_set_name(&devcd->devcd_dev, "devcd%d", 400 atomic_inc_return(&devcd_count)); 401 devcd->devcd_dev.class = &devcd_class; 402 403 dev_set_uevent_suppress(&devcd->devcd_dev, true); 404 405 /* devcd->mutex prevents devcd_del() completing until init finishes */ 406 mutex_lock(&devcd->mutex); 407 devcd->init_completed = false; 408 INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); 409 schedule_delayed_work(&devcd->del_wk, timeout); 410 411 if (device_add(&devcd->devcd_dev)) 412 goto put_device; 413 414 /* 415 * These should normally not fail, but there is no problem 416 * continuing without the links, so just warn instead of 417 * failing. 418 */ 419 if (sysfs_create_link(&devcd->devcd_dev.kobj, &dev->kobj, 420 "failing_device") || 421 sysfs_create_link(&dev->kobj, &devcd->devcd_dev.kobj, 422 "devcoredump")) 423 dev_warn(dev, "devcoredump create_link failed\n"); 424 425 dev_set_uevent_suppress(&devcd->devcd_dev, false); 426 kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); 427 428 /* 429 * Safe to run devcd_del() now that we are done with devcd_dev. 430 * Alternatively we could have taken a ref on devcd_dev before 431 * dropping the lock. 432 */ 433 devcd->init_completed = true; 434 mutex_unlock(&devcd->mutex); 435 return; 436 put_device: 437 mutex_unlock(&devcd->mutex); 438 cancel_delayed_work_sync(&devcd->del_wk); 439 put_device(&devcd->devcd_dev); 440 441 put_module: 442 module_put(owner); 443 free: 444 free(data); 445 } 446 EXPORT_SYMBOL_GPL(dev_coredumpm_timeout); 447 448 /** 449 * dev_coredumpsg - create device coredump that uses scatterlist as data 450 * parameter 451 * @dev: the struct device for the crashed device 452 * @table: the dump data 453 * @datalen: length of the data 454 * @gfp: allocation flags 455 * 456 * Creates a new device coredump for the given device. If a previous one hasn't 457 * been read yet, the new coredump is discarded. The data lifetime is determined 458 * by the device coredump framework and when it is no longer needed 459 * it will free the data. 460 */ 461 void dev_coredumpsg(struct device *dev, struct scatterlist *table, 462 size_t datalen, gfp_t gfp) 463 { 464 dev_coredumpm(dev, NULL, table, datalen, gfp, devcd_read_from_sgtable, 465 devcd_free_sgtable); 466 } 467 EXPORT_SYMBOL_GPL(dev_coredumpsg); 468 469 static int __init devcoredump_init(void) 470 { 471 return class_register(&devcd_class); 472 } 473 __initcall(devcoredump_init); 474 475 static void __exit devcoredump_exit(void) 476 { 477 class_for_each_device(&devcd_class, NULL, NULL, devcd_free); 478 class_unregister(&devcd_class); 479 } 480 __exitcall(devcoredump_exit); 481