1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(c) 2014 Intel Mobile Communications GmbH 4 * Copyright(c) 2015 Intel Deutschland GmbH 5 * 6 * Author: Johannes Berg <johannes@sipsolutions.net> 7 */ 8 #include <linux/module.h> 9 #include <linux/device.h> 10 #include <linux/devcoredump.h> 11 #include <linux/list.h> 12 #include <linux/slab.h> 13 #include <linux/fs.h> 14 #include <linux/workqueue.h> 15 16 static struct class devcd_class; 17 18 /* global disable flag, for security purposes */ 19 static bool devcd_disabled; 20 21 struct devcd_entry { 22 struct device devcd_dev; 23 void *data; 24 size_t datalen; 25 /* 26 * Here, mutex is required to serialize the calls to del_wk work between 27 * user/kernel space which happens when devcd is added with device_add() 28 * and that sends uevent to user space. User space reads the uevents, 29 * and calls to devcd_data_write() which try to modify the work which is 30 * not even initialized/queued from devcoredump. 31 * 32 * 33 * 34 * cpu0(X) cpu1(Y) 35 * 36 * dev_coredump() uevent sent to user space 37 * device_add() ======================> user space process Y reads the 38 * uevents writes to devcd fd 39 * which results into writes to 40 * 41 * devcd_data_write() 42 * mod_delayed_work() 43 * try_to_grab_pending() 44 * del_timer() 45 * debug_assert_init() 46 * INIT_DELAYED_WORK() 47 * schedule_delayed_work() 48 * 49 * 50 * Also, mutex alone would not be enough to avoid scheduling of 51 * del_wk work after it get flush from a call to devcd_free() 52 * mentioned as below. 53 * 54 * disabled_store() 55 * devcd_free() 56 * mutex_lock() devcd_data_write() 57 * flush_delayed_work() 58 * mutex_unlock() 59 * mutex_lock() 60 * mod_delayed_work() 61 * mutex_unlock() 62 * So, delete_work flag is required. 63 */ 64 struct mutex mutex; 65 bool delete_work; 66 struct module *owner; 67 ssize_t (*read)(char *buffer, loff_t offset, size_t count, 68 void *data, size_t datalen); 69 void (*free)(void *data); 70 struct delayed_work del_wk; 71 struct device *failing_dev; 72 }; 73 74 static struct devcd_entry *dev_to_devcd(struct device *dev) 75 { 76 return container_of(dev, struct devcd_entry, devcd_dev); 77 } 78 79 static void devcd_dev_release(struct device *dev) 80 { 81 struct devcd_entry *devcd = dev_to_devcd(dev); 82 83 devcd->free(devcd->data); 84 module_put(devcd->owner); 85 86 /* 87 * this seems racy, but I don't see a notifier or such on 88 * a struct device to know when it goes away? 89 */ 90 if (devcd->failing_dev->kobj.sd) 91 sysfs_delete_link(&devcd->failing_dev->kobj, &dev->kobj, 92 "devcoredump"); 93 94 put_device(devcd->failing_dev); 95 kfree(devcd); 96 } 97 98 static void devcd_del(struct work_struct *wk) 99 { 100 struct devcd_entry *devcd; 101 102 devcd = container_of(wk, struct devcd_entry, del_wk.work); 103 104 device_del(&devcd->devcd_dev); 105 put_device(&devcd->devcd_dev); 106 } 107 108 static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj, 109 const struct bin_attribute *bin_attr, 110 char *buffer, loff_t offset, size_t count) 111 { 112 struct device *dev = kobj_to_dev(kobj); 113 struct devcd_entry *devcd = dev_to_devcd(dev); 114 115 return devcd->read(buffer, offset, count, devcd->data, devcd->datalen); 116 } 117 118 static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj, 119 const struct bin_attribute *bin_attr, 120 char *buffer, loff_t offset, size_t count) 121 { 122 struct device *dev = kobj_to_dev(kobj); 123 struct devcd_entry *devcd = dev_to_devcd(dev); 124 125 mutex_lock(&devcd->mutex); 126 if (!devcd->delete_work) { 127 devcd->delete_work = true; 128 mod_delayed_work(system_wq, &devcd->del_wk, 0); 129 } 130 mutex_unlock(&devcd->mutex); 131 132 return count; 133 } 134 135 static const struct bin_attribute devcd_attr_data = 136 __BIN_ATTR(data, 0600, devcd_data_read, devcd_data_write, 0); 137 138 static const struct bin_attribute *const devcd_dev_bin_attrs[] = { 139 &devcd_attr_data, NULL, 140 }; 141 142 static const struct attribute_group devcd_dev_group = { 143 .bin_attrs_new = devcd_dev_bin_attrs, 144 }; 145 146 static const struct attribute_group *devcd_dev_groups[] = { 147 &devcd_dev_group, NULL, 148 }; 149 150 static int devcd_free(struct device *dev, void *data) 151 { 152 struct devcd_entry *devcd = dev_to_devcd(dev); 153 154 mutex_lock(&devcd->mutex); 155 if (!devcd->delete_work) 156 devcd->delete_work = true; 157 158 flush_delayed_work(&devcd->del_wk); 159 mutex_unlock(&devcd->mutex); 160 return 0; 161 } 162 163 static ssize_t disabled_show(const struct class *class, const struct class_attribute *attr, 164 char *buf) 165 { 166 return sysfs_emit(buf, "%d\n", devcd_disabled); 167 } 168 169 /* 170 * 171 * disabled_store() worker() 172 * class_for_each_device(&devcd_class, 173 * NULL, NULL, devcd_free) 174 * ... 175 * ... 176 * while ((dev = class_dev_iter_next(&iter)) 177 * devcd_del() 178 * device_del() 179 * put_device() <- last reference 180 * error = fn(dev, data) devcd_dev_release() 181 * devcd_free(dev, data) kfree(devcd) 182 * mutex_lock(&devcd->mutex); 183 * 184 * 185 * In the above diagram, it looks like disabled_store() would be racing with parallelly 186 * running devcd_del() and result in memory abort while acquiring devcd->mutex which 187 * is called after kfree of devcd memory after dropping its last reference with 188 * put_device(). However, this will not happens as fn(dev, data) runs 189 * with its own reference to device via klist_node so it is not its last reference. 190 * so, above situation would not occur. 191 */ 192 193 static ssize_t disabled_store(const struct class *class, const struct class_attribute *attr, 194 const char *buf, size_t count) 195 { 196 long tmp = simple_strtol(buf, NULL, 10); 197 198 /* 199 * This essentially makes the attribute write-once, since you can't 200 * go back to not having it disabled. This is intentional, it serves 201 * as a system lockdown feature. 202 */ 203 if (tmp != 1) 204 return -EINVAL; 205 206 devcd_disabled = true; 207 208 class_for_each_device(&devcd_class, NULL, NULL, devcd_free); 209 210 return count; 211 } 212 static CLASS_ATTR_RW(disabled); 213 214 static struct attribute *devcd_class_attrs[] = { 215 &class_attr_disabled.attr, 216 NULL, 217 }; 218 ATTRIBUTE_GROUPS(devcd_class); 219 220 static struct class devcd_class = { 221 .name = "devcoredump", 222 .dev_release = devcd_dev_release, 223 .dev_groups = devcd_dev_groups, 224 .class_groups = devcd_class_groups, 225 }; 226 227 static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count, 228 void *data, size_t datalen) 229 { 230 return memory_read_from_buffer(buffer, count, &offset, data, datalen); 231 } 232 233 static void devcd_freev(void *data) 234 { 235 vfree(data); 236 } 237 238 /** 239 * dev_coredumpv - create device coredump with vmalloc data 240 * @dev: the struct device for the crashed device 241 * @data: vmalloc data containing the device coredump 242 * @datalen: length of the data 243 * @gfp: allocation flags 244 * 245 * This function takes ownership of the vmalloc'ed data and will free 246 * it when it is no longer used. See dev_coredumpm() for more information. 247 */ 248 void dev_coredumpv(struct device *dev, void *data, size_t datalen, 249 gfp_t gfp) 250 { 251 dev_coredumpm(dev, NULL, data, datalen, gfp, devcd_readv, devcd_freev); 252 } 253 EXPORT_SYMBOL_GPL(dev_coredumpv); 254 255 static int devcd_match_failing(struct device *dev, const void *failing) 256 { 257 struct devcd_entry *devcd = dev_to_devcd(dev); 258 259 return devcd->failing_dev == failing; 260 } 261 262 /** 263 * devcd_free_sgtable - free all the memory of the given scatterlist table 264 * (i.e. both pages and scatterlist instances) 265 * NOTE: if two tables allocated with devcd_alloc_sgtable and then chained 266 * using the sg_chain function then that function should be called only once 267 * on the chained table 268 * @data: pointer to sg_table to free 269 */ 270 static void devcd_free_sgtable(void *data) 271 { 272 _devcd_free_sgtable(data); 273 } 274 275 /** 276 * devcd_read_from_sgtable - copy data from sg_table to a given buffer 277 * and return the number of bytes read 278 * @buffer: the buffer to copy the data to it 279 * @buf_len: the length of the buffer 280 * @data: the scatterlist table to copy from 281 * @offset: start copy from @offset@ bytes from the head of the data 282 * in the given scatterlist 283 * @data_len: the length of the data in the sg_table 284 * 285 * Returns: the number of bytes copied 286 */ 287 static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset, 288 size_t buf_len, void *data, 289 size_t data_len) 290 { 291 struct scatterlist *table = data; 292 293 if (offset > data_len) 294 return -EINVAL; 295 296 if (offset + buf_len > data_len) 297 buf_len = data_len - offset; 298 return sg_pcopy_to_buffer(table, sg_nents(table), buffer, buf_len, 299 offset); 300 } 301 302 /** 303 * dev_coredump_put - remove device coredump 304 * @dev: the struct device for the crashed device 305 * 306 * dev_coredump_put() removes coredump, if exists, for a given device from 307 * the file system and free its associated data otherwise, does nothing. 308 * 309 * It is useful for modules that do not want to keep coredump 310 * available after its unload. 311 */ 312 void dev_coredump_put(struct device *dev) 313 { 314 struct device *existing; 315 316 existing = class_find_device(&devcd_class, NULL, dev, 317 devcd_match_failing); 318 if (existing) { 319 devcd_free(existing, NULL); 320 put_device(existing); 321 } 322 } 323 EXPORT_SYMBOL_GPL(dev_coredump_put); 324 325 /** 326 * dev_coredumpm_timeout - create device coredump with read/free methods with a 327 * custom timeout. 328 * @dev: the struct device for the crashed device 329 * @owner: the module that contains the read/free functions, use %THIS_MODULE 330 * @data: data cookie for the @read/@free functions 331 * @datalen: length of the data 332 * @gfp: allocation flags 333 * @read: function to read from the given buffer 334 * @free: function to free the given buffer 335 * @timeout: time in jiffies to remove coredump 336 * 337 * Creates a new device coredump for the given device. If a previous one hasn't 338 * been read yet, the new coredump is discarded. The data lifetime is determined 339 * by the device coredump framework and when it is no longer needed the @free 340 * function will be called to free the data. 341 */ 342 void dev_coredumpm_timeout(struct device *dev, struct module *owner, 343 void *data, size_t datalen, gfp_t gfp, 344 ssize_t (*read)(char *buffer, loff_t offset, 345 size_t count, void *data, 346 size_t datalen), 347 void (*free)(void *data), 348 unsigned long timeout) 349 { 350 static atomic_t devcd_count = ATOMIC_INIT(0); 351 struct devcd_entry *devcd; 352 struct device *existing; 353 354 if (devcd_disabled) 355 goto free; 356 357 existing = class_find_device(&devcd_class, NULL, dev, 358 devcd_match_failing); 359 if (existing) { 360 put_device(existing); 361 goto free; 362 } 363 364 if (!try_module_get(owner)) 365 goto free; 366 367 devcd = kzalloc(sizeof(*devcd), gfp); 368 if (!devcd) 369 goto put_module; 370 371 devcd->owner = owner; 372 devcd->data = data; 373 devcd->datalen = datalen; 374 devcd->read = read; 375 devcd->free = free; 376 devcd->failing_dev = get_device(dev); 377 devcd->delete_work = false; 378 379 mutex_init(&devcd->mutex); 380 device_initialize(&devcd->devcd_dev); 381 382 dev_set_name(&devcd->devcd_dev, "devcd%d", 383 atomic_inc_return(&devcd_count)); 384 devcd->devcd_dev.class = &devcd_class; 385 386 mutex_lock(&devcd->mutex); 387 dev_set_uevent_suppress(&devcd->devcd_dev, true); 388 if (device_add(&devcd->devcd_dev)) 389 goto put_device; 390 391 /* 392 * These should normally not fail, but there is no problem 393 * continuing without the links, so just warn instead of 394 * failing. 395 */ 396 if (sysfs_create_link(&devcd->devcd_dev.kobj, &dev->kobj, 397 "failing_device") || 398 sysfs_create_link(&dev->kobj, &devcd->devcd_dev.kobj, 399 "devcoredump")) 400 dev_warn(dev, "devcoredump create_link failed\n"); 401 402 dev_set_uevent_suppress(&devcd->devcd_dev, false); 403 kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); 404 INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); 405 schedule_delayed_work(&devcd->del_wk, timeout); 406 mutex_unlock(&devcd->mutex); 407 return; 408 put_device: 409 put_device(&devcd->devcd_dev); 410 mutex_unlock(&devcd->mutex); 411 put_module: 412 module_put(owner); 413 free: 414 free(data); 415 } 416 EXPORT_SYMBOL_GPL(dev_coredumpm_timeout); 417 418 /** 419 * dev_coredumpsg - create device coredump that uses scatterlist as data 420 * parameter 421 * @dev: the struct device for the crashed device 422 * @table: the dump data 423 * @datalen: length of the data 424 * @gfp: allocation flags 425 * 426 * Creates a new device coredump for the given device. If a previous one hasn't 427 * been read yet, the new coredump is discarded. The data lifetime is determined 428 * by the device coredump framework and when it is no longer needed 429 * it will free the data. 430 */ 431 void dev_coredumpsg(struct device *dev, struct scatterlist *table, 432 size_t datalen, gfp_t gfp) 433 { 434 dev_coredumpm(dev, NULL, table, datalen, gfp, devcd_read_from_sgtable, 435 devcd_free_sgtable); 436 } 437 EXPORT_SYMBOL_GPL(dev_coredumpsg); 438 439 static int __init devcoredump_init(void) 440 { 441 return class_register(&devcd_class); 442 } 443 __initcall(devcoredump_init); 444 445 static void __exit devcoredump_exit(void) 446 { 447 class_for_each_device(&devcd_class, NULL, NULL, devcd_free); 448 class_unregister(&devcd_class); 449 } 450 __exitcall(devcoredump_exit); 451