xref: /linux/drivers/firmware/qemu_fw_cfg.c (revision a1c3be890440a1769ed6f822376a3e3ab0d42994)
1 /*
2  * drivers/firmware/qemu_fw_cfg.c
3  *
4  * Copyright 2015 Carnegie Mellon University
5  *
6  * Expose entries from QEMU's firmware configuration (fw_cfg) device in
7  * sysfs (read-only, under "/sys/firmware/qemu_fw_cfg/...").
8  *
9  * The fw_cfg device may be instantiated via either an ACPI node (on x86
10  * and select subsets of aarch64), a Device Tree node (on arm), or using
11  * a kernel module (or command line) parameter with the following syntax:
12  *
13  *      [qemu_fw_cfg.]ioport=<size>@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]
14  * or
15  *      [qemu_fw_cfg.]mmio=<size>@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]
16  *
17  * where:
18  *      <size>     := size of ioport or mmio range
19  *      <base>     := physical base address of ioport or mmio range
20  *      <ctrl_off> := (optional) offset of control register
21  *      <data_off> := (optional) offset of data register
22  *      <dma_off> := (optional) offset of dma register
23  *
24  * e.g.:
25  *      qemu_fw_cfg.ioport=12@0x510:0:1:4	(the default on x86)
26  * or
27  *      qemu_fw_cfg.mmio=16@0x9020000:8:0:16	(the default on arm)
28  */
29 
30 #include <linux/module.h>
31 #include <linux/mod_devicetable.h>
32 #include <linux/platform_device.h>
33 #include <linux/acpi.h>
34 #include <linux/slab.h>
35 #include <linux/io.h>
36 #include <linux/ioport.h>
37 #include <uapi/linux/qemu_fw_cfg.h>
38 #include <linux/delay.h>
39 #include <linux/crash_dump.h>
40 #include <linux/crash_core.h>
41 
42 MODULE_AUTHOR("Gabriel L. Somlo <somlo@cmu.edu>");
43 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
44 MODULE_LICENSE("GPL");
45 
46 /* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
47 static u32 fw_cfg_rev;
48 
49 /* fw_cfg device i/o register addresses */
50 static bool fw_cfg_is_mmio;
51 static phys_addr_t fw_cfg_p_base;
52 static resource_size_t fw_cfg_p_size;
53 static void __iomem *fw_cfg_dev_base;
54 static void __iomem *fw_cfg_reg_ctrl;
55 static void __iomem *fw_cfg_reg_data;
56 static void __iomem *fw_cfg_reg_dma;
57 
58 /* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
59 static DEFINE_MUTEX(fw_cfg_dev_lock);
60 
61 /* pick appropriate endianness for selector key */
62 static void fw_cfg_sel_endianness(u16 key)
63 {
64 	if (fw_cfg_is_mmio)
65 		iowrite16be(key, fw_cfg_reg_ctrl);
66 	else
67 		iowrite16(key, fw_cfg_reg_ctrl);
68 }
69 
70 #ifdef CONFIG_CRASH_CORE
71 static inline bool fw_cfg_dma_enabled(void)
72 {
73 	return (fw_cfg_rev & FW_CFG_VERSION_DMA) && fw_cfg_reg_dma;
74 }
75 
76 /* qemu fw_cfg device is sync today, but spec says it may become async */
77 static void fw_cfg_wait_for_control(struct fw_cfg_dma_access *d)
78 {
79 	for (;;) {
80 		u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
81 
82 		/* do not reorder the read to d->control */
83 		rmb();
84 		if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
85 			return;
86 
87 		cpu_relax();
88 	}
89 }
90 
91 static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
92 {
93 	phys_addr_t dma;
94 	struct fw_cfg_dma_access *d = NULL;
95 	ssize_t ret = length;
96 
97 	d = kmalloc(sizeof(*d), GFP_KERNEL);
98 	if (!d) {
99 		ret = -ENOMEM;
100 		goto end;
101 	}
102 
103 	/* fw_cfg device does not need IOMMU protection, so use physical addresses */
104 	*d = (struct fw_cfg_dma_access) {
105 		.address = cpu_to_be64(address ? virt_to_phys(address) : 0),
106 		.length = cpu_to_be32(length),
107 		.control = cpu_to_be32(control)
108 	};
109 
110 	dma = virt_to_phys(d);
111 
112 	iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
113 	/* force memory to sync before notifying device via MMIO */
114 	wmb();
115 	iowrite32be(dma, fw_cfg_reg_dma + 4);
116 
117 	fw_cfg_wait_for_control(d);
118 
119 	if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
120 		ret = -EIO;
121 	}
122 
123 end:
124 	kfree(d);
125 
126 	return ret;
127 }
128 #endif
129 
130 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
131 static ssize_t fw_cfg_read_blob(u16 key,
132 				void *buf, loff_t pos, size_t count)
133 {
134 	u32 glk = -1U;
135 	acpi_status status;
136 
137 	/* If we have ACPI, ensure mutual exclusion against any potential
138 	 * device access by the firmware, e.g. via AML methods:
139 	 */
140 	status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
141 	if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
142 		/* Should never get here */
143 		WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
144 		memset(buf, 0, count);
145 		return -EINVAL;
146 	}
147 
148 	mutex_lock(&fw_cfg_dev_lock);
149 	fw_cfg_sel_endianness(key);
150 	while (pos-- > 0)
151 		ioread8(fw_cfg_reg_data);
152 	ioread8_rep(fw_cfg_reg_data, buf, count);
153 	mutex_unlock(&fw_cfg_dev_lock);
154 
155 	acpi_release_global_lock(glk);
156 	return count;
157 }
158 
159 #ifdef CONFIG_CRASH_CORE
160 /* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
161 static ssize_t fw_cfg_write_blob(u16 key,
162 				 void *buf, loff_t pos, size_t count)
163 {
164 	u32 glk = -1U;
165 	acpi_status status;
166 	ssize_t ret = count;
167 
168 	/* If we have ACPI, ensure mutual exclusion against any potential
169 	 * device access by the firmware, e.g. via AML methods:
170 	 */
171 	status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
172 	if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
173 		/* Should never get here */
174 		WARN(1, "%s: Failed to lock ACPI!\n", __func__);
175 		return -EINVAL;
176 	}
177 
178 	mutex_lock(&fw_cfg_dev_lock);
179 	if (pos == 0) {
180 		ret = fw_cfg_dma_transfer(buf, count, key << 16
181 					  | FW_CFG_DMA_CTL_SELECT
182 					  | FW_CFG_DMA_CTL_WRITE);
183 	} else {
184 		fw_cfg_sel_endianness(key);
185 		ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
186 		if (ret < 0)
187 			goto end;
188 		ret = fw_cfg_dma_transfer(buf, count, FW_CFG_DMA_CTL_WRITE);
189 	}
190 
191 end:
192 	mutex_unlock(&fw_cfg_dev_lock);
193 
194 	acpi_release_global_lock(glk);
195 
196 	return ret;
197 }
198 #endif /* CONFIG_CRASH_CORE */
199 
200 /* clean up fw_cfg device i/o */
201 static void fw_cfg_io_cleanup(void)
202 {
203 	if (fw_cfg_is_mmio) {
204 		iounmap(fw_cfg_dev_base);
205 		release_mem_region(fw_cfg_p_base, fw_cfg_p_size);
206 	} else {
207 		ioport_unmap(fw_cfg_dev_base);
208 		release_region(fw_cfg_p_base, fw_cfg_p_size);
209 	}
210 }
211 
212 /* arch-specific ctrl & data register offsets are not available in ACPI, DT */
213 #if !(defined(FW_CFG_CTRL_OFF) && defined(FW_CFG_DATA_OFF))
214 # if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
215 #  define FW_CFG_CTRL_OFF 0x08
216 #  define FW_CFG_DATA_OFF 0x00
217 #  define FW_CFG_DMA_OFF 0x10
218 # elif defined(CONFIG_PARISC)	/* parisc */
219 #  define FW_CFG_CTRL_OFF 0x00
220 #  define FW_CFG_DATA_OFF 0x04
221 # elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m */
222 #  define FW_CFG_CTRL_OFF 0x00
223 #  define FW_CFG_DATA_OFF 0x02
224 # elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
225 #  define FW_CFG_CTRL_OFF 0x00
226 #  define FW_CFG_DATA_OFF 0x01
227 #  define FW_CFG_DMA_OFF 0x04
228 # else
229 #  error "QEMU FW_CFG not available on this architecture!"
230 # endif
231 #endif
232 
233 /* initialize fw_cfg device i/o from platform data */
234 static int fw_cfg_do_platform_probe(struct platform_device *pdev)
235 {
236 	char sig[FW_CFG_SIG_SIZE];
237 	struct resource *range, *ctrl, *data, *dma;
238 
239 	/* acquire i/o range details */
240 	fw_cfg_is_mmio = false;
241 	range = platform_get_resource(pdev, IORESOURCE_IO, 0);
242 	if (!range) {
243 		fw_cfg_is_mmio = true;
244 		range = platform_get_resource(pdev, IORESOURCE_MEM, 0);
245 		if (!range)
246 			return -EINVAL;
247 	}
248 	fw_cfg_p_base = range->start;
249 	fw_cfg_p_size = resource_size(range);
250 
251 	if (fw_cfg_is_mmio) {
252 		if (!request_mem_region(fw_cfg_p_base,
253 					fw_cfg_p_size, "fw_cfg_mem"))
254 			return -EBUSY;
255 		fw_cfg_dev_base = ioremap(fw_cfg_p_base, fw_cfg_p_size);
256 		if (!fw_cfg_dev_base) {
257 			release_mem_region(fw_cfg_p_base, fw_cfg_p_size);
258 			return -EFAULT;
259 		}
260 	} else {
261 		if (!request_region(fw_cfg_p_base,
262 				    fw_cfg_p_size, "fw_cfg_io"))
263 			return -EBUSY;
264 		fw_cfg_dev_base = ioport_map(fw_cfg_p_base, fw_cfg_p_size);
265 		if (!fw_cfg_dev_base) {
266 			release_region(fw_cfg_p_base, fw_cfg_p_size);
267 			return -EFAULT;
268 		}
269 	}
270 
271 	/* were custom register offsets provided (e.g. on the command line)? */
272 	ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
273 	data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
274 	dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
275 	if (ctrl && data) {
276 		fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
277 		fw_cfg_reg_data = fw_cfg_dev_base + data->start;
278 	} else {
279 		/* use architecture-specific offsets */
280 		fw_cfg_reg_ctrl = fw_cfg_dev_base + FW_CFG_CTRL_OFF;
281 		fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
282 	}
283 
284 	if (dma)
285 		fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
286 #ifdef FW_CFG_DMA_OFF
287 	else
288 		fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
289 #endif
290 
291 	/* verify fw_cfg device signature */
292 	if (fw_cfg_read_blob(FW_CFG_SIGNATURE, sig,
293 				0, FW_CFG_SIG_SIZE) < 0 ||
294 		memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
295 		fw_cfg_io_cleanup();
296 		return -ENODEV;
297 	}
298 
299 	return 0;
300 }
301 
302 static ssize_t fw_cfg_showrev(struct kobject *k, struct attribute *a, char *buf)
303 {
304 	return sprintf(buf, "%u\n", fw_cfg_rev);
305 }
306 
307 static const struct {
308 	struct attribute attr;
309 	ssize_t (*show)(struct kobject *k, struct attribute *a, char *buf);
310 } fw_cfg_rev_attr = {
311 	.attr = { .name = "rev", .mode = S_IRUSR },
312 	.show = fw_cfg_showrev,
313 };
314 
315 /* fw_cfg_sysfs_entry type */
316 struct fw_cfg_sysfs_entry {
317 	struct kobject kobj;
318 	u32 size;
319 	u16 select;
320 	char name[FW_CFG_MAX_FILE_PATH];
321 	struct list_head list;
322 };
323 
324 #ifdef CONFIG_CRASH_CORE
325 static ssize_t fw_cfg_write_vmcoreinfo(const struct fw_cfg_file *f)
326 {
327 	static struct fw_cfg_vmcoreinfo *data;
328 	ssize_t ret;
329 
330 	data = kmalloc(sizeof(struct fw_cfg_vmcoreinfo), GFP_KERNEL);
331 	if (!data)
332 		return -ENOMEM;
333 
334 	*data = (struct fw_cfg_vmcoreinfo) {
335 		.guest_format = cpu_to_le16(FW_CFG_VMCOREINFO_FORMAT_ELF),
336 		.size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
337 		.paddr = cpu_to_le64(paddr_vmcoreinfo_note())
338 	};
339 	/* spare ourself reading host format support for now since we
340 	 * don't know what else to format - host may ignore ours
341 	 */
342 	ret = fw_cfg_write_blob(be16_to_cpu(f->select), data,
343 				0, sizeof(struct fw_cfg_vmcoreinfo));
344 
345 	kfree(data);
346 	return ret;
347 }
348 #endif /* CONFIG_CRASH_CORE */
349 
350 /* get fw_cfg_sysfs_entry from kobject member */
351 static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj)
352 {
353 	return container_of(kobj, struct fw_cfg_sysfs_entry, kobj);
354 }
355 
356 /* fw_cfg_sysfs_attribute type */
357 struct fw_cfg_sysfs_attribute {
358 	struct attribute attr;
359 	ssize_t (*show)(struct fw_cfg_sysfs_entry *entry, char *buf);
360 };
361 
362 /* get fw_cfg_sysfs_attribute from attribute member */
363 static inline struct fw_cfg_sysfs_attribute *to_attr(struct attribute *attr)
364 {
365 	return container_of(attr, struct fw_cfg_sysfs_attribute, attr);
366 }
367 
368 /* global cache of fw_cfg_sysfs_entry objects */
369 static LIST_HEAD(fw_cfg_entry_cache);
370 
371 /* kobjects removed lazily by kernel, mutual exclusion needed */
372 static DEFINE_SPINLOCK(fw_cfg_cache_lock);
373 
374 static inline void fw_cfg_sysfs_cache_enlist(struct fw_cfg_sysfs_entry *entry)
375 {
376 	spin_lock(&fw_cfg_cache_lock);
377 	list_add_tail(&entry->list, &fw_cfg_entry_cache);
378 	spin_unlock(&fw_cfg_cache_lock);
379 }
380 
381 static inline void fw_cfg_sysfs_cache_delist(struct fw_cfg_sysfs_entry *entry)
382 {
383 	spin_lock(&fw_cfg_cache_lock);
384 	list_del(&entry->list);
385 	spin_unlock(&fw_cfg_cache_lock);
386 }
387 
388 static void fw_cfg_sysfs_cache_cleanup(void)
389 {
390 	struct fw_cfg_sysfs_entry *entry, *next;
391 
392 	list_for_each_entry_safe(entry, next, &fw_cfg_entry_cache, list) {
393 		/* will end up invoking fw_cfg_sysfs_cache_delist()
394 		 * via each object's release() method (i.e. destructor)
395 		 */
396 		kobject_put(&entry->kobj);
397 	}
398 }
399 
400 /* default_attrs: per-entry attributes and show methods */
401 
402 #define FW_CFG_SYSFS_ATTR(_attr) \
403 struct fw_cfg_sysfs_attribute fw_cfg_sysfs_attr_##_attr = { \
404 	.attr = { .name = __stringify(_attr), .mode = S_IRUSR }, \
405 	.show = fw_cfg_sysfs_show_##_attr, \
406 }
407 
408 static ssize_t fw_cfg_sysfs_show_size(struct fw_cfg_sysfs_entry *e, char *buf)
409 {
410 	return sprintf(buf, "%u\n", e->size);
411 }
412 
413 static ssize_t fw_cfg_sysfs_show_key(struct fw_cfg_sysfs_entry *e, char *buf)
414 {
415 	return sprintf(buf, "%u\n", e->select);
416 }
417 
418 static ssize_t fw_cfg_sysfs_show_name(struct fw_cfg_sysfs_entry *e, char *buf)
419 {
420 	return sprintf(buf, "%s\n", e->name);
421 }
422 
423 static FW_CFG_SYSFS_ATTR(size);
424 static FW_CFG_SYSFS_ATTR(key);
425 static FW_CFG_SYSFS_ATTR(name);
426 
427 static struct attribute *fw_cfg_sysfs_entry_attrs[] = {
428 	&fw_cfg_sysfs_attr_size.attr,
429 	&fw_cfg_sysfs_attr_key.attr,
430 	&fw_cfg_sysfs_attr_name.attr,
431 	NULL,
432 };
433 
434 /* sysfs_ops: find fw_cfg_[entry, attribute] and call appropriate show method */
435 static ssize_t fw_cfg_sysfs_attr_show(struct kobject *kobj, struct attribute *a,
436 				      char *buf)
437 {
438 	struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
439 	struct fw_cfg_sysfs_attribute *attr = to_attr(a);
440 
441 	return attr->show(entry, buf);
442 }
443 
444 static const struct sysfs_ops fw_cfg_sysfs_attr_ops = {
445 	.show = fw_cfg_sysfs_attr_show,
446 };
447 
448 /* release: destructor, to be called via kobject_put() */
449 static void fw_cfg_sysfs_release_entry(struct kobject *kobj)
450 {
451 	struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
452 
453 	fw_cfg_sysfs_cache_delist(entry);
454 	kfree(entry);
455 }
456 
457 /* kobj_type: ties together all properties required to register an entry */
458 static struct kobj_type fw_cfg_sysfs_entry_ktype = {
459 	.default_attrs = fw_cfg_sysfs_entry_attrs,
460 	.sysfs_ops = &fw_cfg_sysfs_attr_ops,
461 	.release = fw_cfg_sysfs_release_entry,
462 };
463 
464 /* raw-read method and attribute */
465 static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, struct kobject *kobj,
466 				     struct bin_attribute *bin_attr,
467 				     char *buf, loff_t pos, size_t count)
468 {
469 	struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
470 
471 	if (pos > entry->size)
472 		return -EINVAL;
473 
474 	if (count > entry->size - pos)
475 		count = entry->size - pos;
476 
477 	return fw_cfg_read_blob(entry->select, buf, pos, count);
478 }
479 
480 static struct bin_attribute fw_cfg_sysfs_attr_raw = {
481 	.attr = { .name = "raw", .mode = S_IRUSR },
482 	.read = fw_cfg_sysfs_read_raw,
483 };
484 
485 /*
486  * Create a kset subdirectory matching each '/' delimited dirname token
487  * in 'name', starting with sysfs kset/folder 'dir'; At the end, create
488  * a symlink directed at the given 'target'.
489  * NOTE: We do this on a best-effort basis, since 'name' is not guaranteed
490  * to be a well-behaved path name. Whenever a symlink vs. kset directory
491  * name collision occurs, the kernel will issue big scary warnings while
492  * refusing to add the offending link or directory. We follow up with our
493  * own, slightly less scary error messages explaining the situation :)
494  */
495 static int fw_cfg_build_symlink(struct kset *dir,
496 				struct kobject *target, const char *name)
497 {
498 	int ret;
499 	struct kset *subdir;
500 	struct kobject *ko;
501 	char *name_copy, *p, *tok;
502 
503 	if (!dir || !target || !name || !*name)
504 		return -EINVAL;
505 
506 	/* clone a copy of name for parsing */
507 	name_copy = p = kstrdup(name, GFP_KERNEL);
508 	if (!name_copy)
509 		return -ENOMEM;
510 
511 	/* create folders for each dirname token, then symlink for basename */
512 	while ((tok = strsep(&p, "/")) && *tok) {
513 
514 		/* last (basename) token? If so, add symlink here */
515 		if (!p || !*p) {
516 			ret = sysfs_create_link(&dir->kobj, target, tok);
517 			break;
518 		}
519 
520 		/* does the current dir contain an item named after tok ? */
521 		ko = kset_find_obj(dir, tok);
522 		if (ko) {
523 			/* drop reference added by kset_find_obj */
524 			kobject_put(ko);
525 
526 			/* ko MUST be a kset - we're about to use it as one ! */
527 			if (ko->ktype != dir->kobj.ktype) {
528 				ret = -EINVAL;
529 				break;
530 			}
531 
532 			/* descend into already existing subdirectory */
533 			dir = to_kset(ko);
534 		} else {
535 			/* create new subdirectory kset */
536 			subdir = kzalloc(sizeof(struct kset), GFP_KERNEL);
537 			if (!subdir) {
538 				ret = -ENOMEM;
539 				break;
540 			}
541 			subdir->kobj.kset = dir;
542 			subdir->kobj.ktype = dir->kobj.ktype;
543 			ret = kobject_set_name(&subdir->kobj, "%s", tok);
544 			if (ret) {
545 				kfree(subdir);
546 				break;
547 			}
548 			ret = kset_register(subdir);
549 			if (ret) {
550 				kfree(subdir);
551 				break;
552 			}
553 
554 			/* descend into newly created subdirectory */
555 			dir = subdir;
556 		}
557 	}
558 
559 	/* we're done with cloned copy of name */
560 	kfree(name_copy);
561 	return ret;
562 }
563 
564 /* recursively unregister fw_cfg/by_name/ kset directory tree */
565 static void fw_cfg_kset_unregister_recursive(struct kset *kset)
566 {
567 	struct kobject *k, *next;
568 
569 	list_for_each_entry_safe(k, next, &kset->list, entry)
570 		/* all set members are ksets too, but check just in case... */
571 		if (k->ktype == kset->kobj.ktype)
572 			fw_cfg_kset_unregister_recursive(to_kset(k));
573 
574 	/* symlinks are cleanly and automatically removed with the directory */
575 	kset_unregister(kset);
576 }
577 
578 /* kobjects & kset representing top-level, by_key, and by_name folders */
579 static struct kobject *fw_cfg_top_ko;
580 static struct kobject *fw_cfg_sel_ko;
581 static struct kset *fw_cfg_fname_kset;
582 
583 /* register an individual fw_cfg file */
584 static int fw_cfg_register_file(const struct fw_cfg_file *f)
585 {
586 	int err;
587 	struct fw_cfg_sysfs_entry *entry;
588 
589 #ifdef CONFIG_CRASH_CORE
590 	if (fw_cfg_dma_enabled() &&
591 		strcmp(f->name, FW_CFG_VMCOREINFO_FILENAME) == 0 &&
592 		!is_kdump_kernel()) {
593 		if (fw_cfg_write_vmcoreinfo(f) < 0)
594 			pr_warn("fw_cfg: failed to write vmcoreinfo");
595 	}
596 #endif
597 
598 	/* allocate new entry */
599 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
600 	if (!entry)
601 		return -ENOMEM;
602 
603 	/* set file entry information */
604 	entry->size = be32_to_cpu(f->size);
605 	entry->select = be16_to_cpu(f->select);
606 	memcpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH);
607 
608 	/* register entry under "/sys/firmware/qemu_fw_cfg/by_key/" */
609 	err = kobject_init_and_add(&entry->kobj, &fw_cfg_sysfs_entry_ktype,
610 				   fw_cfg_sel_ko, "%d", entry->select);
611 	if (err) {
612 		kobject_put(&entry->kobj);
613 		return err;
614 	}
615 
616 	/* add raw binary content access */
617 	err = sysfs_create_bin_file(&entry->kobj, &fw_cfg_sysfs_attr_raw);
618 	if (err)
619 		goto err_add_raw;
620 
621 	/* try adding "/sys/firmware/qemu_fw_cfg/by_name/" symlink */
622 	fw_cfg_build_symlink(fw_cfg_fname_kset, &entry->kobj, entry->name);
623 
624 	/* success, add entry to global cache */
625 	fw_cfg_sysfs_cache_enlist(entry);
626 	return 0;
627 
628 err_add_raw:
629 	kobject_del(&entry->kobj);
630 	kfree(entry);
631 	return err;
632 }
633 
634 /* iterate over all fw_cfg directory entries, registering each one */
635 static int fw_cfg_register_dir_entries(void)
636 {
637 	int ret = 0;
638 	__be32 files_count;
639 	u32 count, i;
640 	struct fw_cfg_file *dir;
641 	size_t dir_size;
642 
643 	ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, &files_count,
644 			0, sizeof(files_count));
645 	if (ret < 0)
646 		return ret;
647 
648 	count = be32_to_cpu(files_count);
649 	dir_size = count * sizeof(struct fw_cfg_file);
650 
651 	dir = kmalloc(dir_size, GFP_KERNEL);
652 	if (!dir)
653 		return -ENOMEM;
654 
655 	ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, dir,
656 			sizeof(files_count), dir_size);
657 	if (ret < 0)
658 		goto end;
659 
660 	for (i = 0; i < count; i++) {
661 		ret = fw_cfg_register_file(&dir[i]);
662 		if (ret)
663 			break;
664 	}
665 
666 end:
667 	kfree(dir);
668 	return ret;
669 }
670 
671 /* unregister top-level or by_key folder */
672 static inline void fw_cfg_kobj_cleanup(struct kobject *kobj)
673 {
674 	kobject_del(kobj);
675 	kobject_put(kobj);
676 }
677 
678 static int fw_cfg_sysfs_probe(struct platform_device *pdev)
679 {
680 	int err;
681 	__le32 rev;
682 
683 	/* NOTE: If we supported multiple fw_cfg devices, we'd first create
684 	 * a subdirectory named after e.g. pdev->id, then hang per-device
685 	 * by_key (and by_name) subdirectories underneath it. However, only
686 	 * one fw_cfg device exist system-wide, so if one was already found
687 	 * earlier, we might as well stop here.
688 	 */
689 	if (fw_cfg_sel_ko)
690 		return -EBUSY;
691 
692 	/* create by_key and by_name subdirs of /sys/firmware/qemu_fw_cfg/ */
693 	err = -ENOMEM;
694 	fw_cfg_sel_ko = kobject_create_and_add("by_key", fw_cfg_top_ko);
695 	if (!fw_cfg_sel_ko)
696 		goto err_sel;
697 	fw_cfg_fname_kset = kset_create_and_add("by_name", NULL, fw_cfg_top_ko);
698 	if (!fw_cfg_fname_kset)
699 		goto err_name;
700 
701 	/* initialize fw_cfg device i/o from platform data */
702 	err = fw_cfg_do_platform_probe(pdev);
703 	if (err)
704 		goto err_probe;
705 
706 	/* get revision number, add matching top-level attribute */
707 	err = fw_cfg_read_blob(FW_CFG_ID, &rev, 0, sizeof(rev));
708 	if (err < 0)
709 		goto err_probe;
710 
711 	fw_cfg_rev = le32_to_cpu(rev);
712 	err = sysfs_create_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
713 	if (err)
714 		goto err_rev;
715 
716 	/* process fw_cfg file directory entry, registering each file */
717 	err = fw_cfg_register_dir_entries();
718 	if (err)
719 		goto err_dir;
720 
721 	/* success */
722 	pr_debug("fw_cfg: loaded.\n");
723 	return 0;
724 
725 err_dir:
726 	fw_cfg_sysfs_cache_cleanup();
727 	sysfs_remove_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
728 err_rev:
729 	fw_cfg_io_cleanup();
730 err_probe:
731 	fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset);
732 err_name:
733 	fw_cfg_kobj_cleanup(fw_cfg_sel_ko);
734 err_sel:
735 	return err;
736 }
737 
738 static int fw_cfg_sysfs_remove(struct platform_device *pdev)
739 {
740 	pr_debug("fw_cfg: unloading.\n");
741 	fw_cfg_sysfs_cache_cleanup();
742 	sysfs_remove_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
743 	fw_cfg_io_cleanup();
744 	fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset);
745 	fw_cfg_kobj_cleanup(fw_cfg_sel_ko);
746 	return 0;
747 }
748 
749 static const struct of_device_id fw_cfg_sysfs_mmio_match[] = {
750 	{ .compatible = "qemu,fw-cfg-mmio", },
751 	{},
752 };
753 MODULE_DEVICE_TABLE(of, fw_cfg_sysfs_mmio_match);
754 
755 #ifdef CONFIG_ACPI
756 static const struct acpi_device_id fw_cfg_sysfs_acpi_match[] = {
757 	{ FW_CFG_ACPI_DEVICE_ID, },
758 	{},
759 };
760 MODULE_DEVICE_TABLE(acpi, fw_cfg_sysfs_acpi_match);
761 #endif
762 
763 static struct platform_driver fw_cfg_sysfs_driver = {
764 	.probe = fw_cfg_sysfs_probe,
765 	.remove = fw_cfg_sysfs_remove,
766 	.driver = {
767 		.name = "fw_cfg",
768 		.of_match_table = fw_cfg_sysfs_mmio_match,
769 		.acpi_match_table = ACPI_PTR(fw_cfg_sysfs_acpi_match),
770 	},
771 };
772 
773 #ifdef CONFIG_FW_CFG_SYSFS_CMDLINE
774 
775 static struct platform_device *fw_cfg_cmdline_dev;
776 
777 /* this probably belongs in e.g. include/linux/types.h,
778  * but right now we are the only ones doing it...
779  */
780 #ifdef CONFIG_PHYS_ADDR_T_64BIT
781 #define __PHYS_ADDR_PREFIX "ll"
782 #else
783 #define __PHYS_ADDR_PREFIX ""
784 #endif
785 
786 /* use special scanf/printf modifier for phys_addr_t, resource_size_t */
787 #define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
788 			 ":%" __PHYS_ADDR_PREFIX "i" \
789 			 ":%" __PHYS_ADDR_PREFIX "i%n" \
790 			 ":%" __PHYS_ADDR_PREFIX "i%n"
791 
792 #define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
793 			 "0x%" __PHYS_ADDR_PREFIX "x"
794 
795 #define PH_ADDR_PR_3_FMT PH_ADDR_PR_1_FMT \
796 			 ":%" __PHYS_ADDR_PREFIX "u" \
797 			 ":%" __PHYS_ADDR_PREFIX "u"
798 
799 #define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
800 			 ":%" __PHYS_ADDR_PREFIX "u"
801 
802 static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
803 {
804 	struct resource res[4] = {};
805 	char *str;
806 	phys_addr_t base;
807 	resource_size_t size, ctrl_off, data_off, dma_off;
808 	int processed, consumed = 0;
809 
810 	/* only one fw_cfg device can exist system-wide, so if one
811 	 * was processed on the command line already, we might as
812 	 * well stop here.
813 	 */
814 	if (fw_cfg_cmdline_dev) {
815 		/* avoid leaking previously registered device */
816 		platform_device_unregister(fw_cfg_cmdline_dev);
817 		return -EINVAL;
818 	}
819 
820 	/* consume "<size>" portion of command line argument */
821 	size = memparse(arg, &str);
822 
823 	/* get "@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]" chunks */
824 	processed = sscanf(str, PH_ADDR_SCAN_FMT,
825 			   &base, &consumed,
826 			   &ctrl_off, &data_off, &consumed,
827 			   &dma_off, &consumed);
828 
829 	/* sscanf() must process precisely 1, 3 or 4 chunks:
830 	 * <base> is mandatory, optionally followed by <ctrl_off>
831 	 * and <data_off>, and <dma_off>;
832 	 * there must be no extra characters after the last chunk,
833 	 * so str[consumed] must be '\0'.
834 	 */
835 	if (str[consumed] ||
836 	    (processed != 1 && processed != 3 && processed != 4))
837 		return -EINVAL;
838 
839 	res[0].start = base;
840 	res[0].end = base + size - 1;
841 	res[0].flags = !strcmp(kp->name, "mmio") ? IORESOURCE_MEM :
842 						   IORESOURCE_IO;
843 
844 	/* insert register offsets, if provided */
845 	if (processed > 1) {
846 		res[1].name = "ctrl";
847 		res[1].start = ctrl_off;
848 		res[1].flags = IORESOURCE_REG;
849 		res[2].name = "data";
850 		res[2].start = data_off;
851 		res[2].flags = IORESOURCE_REG;
852 	}
853 	if (processed > 3) {
854 		res[3].name = "dma";
855 		res[3].start = dma_off;
856 		res[3].flags = IORESOURCE_REG;
857 	}
858 
859 	/* "processed" happens to nicely match the number of resources
860 	 * we need to pass in to this platform device.
861 	 */
862 	fw_cfg_cmdline_dev = platform_device_register_simple("fw_cfg",
863 					PLATFORM_DEVID_NONE, res, processed);
864 
865 	return PTR_ERR_OR_ZERO(fw_cfg_cmdline_dev);
866 }
867 
868 static int fw_cfg_cmdline_get(char *buf, const struct kernel_param *kp)
869 {
870 	/* stay silent if device was not configured via the command
871 	 * line, or if the parameter name (ioport/mmio) doesn't match
872 	 * the device setting
873 	 */
874 	if (!fw_cfg_cmdline_dev ||
875 	    (!strcmp(kp->name, "mmio") ^
876 	     (fw_cfg_cmdline_dev->resource[0].flags == IORESOURCE_MEM)))
877 		return 0;
878 
879 	switch (fw_cfg_cmdline_dev->num_resources) {
880 	case 1:
881 		return snprintf(buf, PAGE_SIZE, PH_ADDR_PR_1_FMT,
882 				resource_size(&fw_cfg_cmdline_dev->resource[0]),
883 				fw_cfg_cmdline_dev->resource[0].start);
884 	case 3:
885 		return snprintf(buf, PAGE_SIZE, PH_ADDR_PR_3_FMT,
886 				resource_size(&fw_cfg_cmdline_dev->resource[0]),
887 				fw_cfg_cmdline_dev->resource[0].start,
888 				fw_cfg_cmdline_dev->resource[1].start,
889 				fw_cfg_cmdline_dev->resource[2].start);
890 	case 4:
891 		return snprintf(buf, PAGE_SIZE, PH_ADDR_PR_4_FMT,
892 				resource_size(&fw_cfg_cmdline_dev->resource[0]),
893 				fw_cfg_cmdline_dev->resource[0].start,
894 				fw_cfg_cmdline_dev->resource[1].start,
895 				fw_cfg_cmdline_dev->resource[2].start,
896 				fw_cfg_cmdline_dev->resource[3].start);
897 	}
898 
899 	/* Should never get here */
900 	WARN(1, "Unexpected number of resources: %d\n",
901 		fw_cfg_cmdline_dev->num_resources);
902 	return 0;
903 }
904 
905 static const struct kernel_param_ops fw_cfg_cmdline_param_ops = {
906 	.set = fw_cfg_cmdline_set,
907 	.get = fw_cfg_cmdline_get,
908 };
909 
910 device_param_cb(ioport, &fw_cfg_cmdline_param_ops, NULL, S_IRUSR);
911 device_param_cb(mmio, &fw_cfg_cmdline_param_ops, NULL, S_IRUSR);
912 
913 #endif /* CONFIG_FW_CFG_SYSFS_CMDLINE */
914 
915 static int __init fw_cfg_sysfs_init(void)
916 {
917 	int ret;
918 
919 	/* create /sys/firmware/qemu_fw_cfg/ top level directory */
920 	fw_cfg_top_ko = kobject_create_and_add("qemu_fw_cfg", firmware_kobj);
921 	if (!fw_cfg_top_ko)
922 		return -ENOMEM;
923 
924 	ret = platform_driver_register(&fw_cfg_sysfs_driver);
925 	if (ret)
926 		fw_cfg_kobj_cleanup(fw_cfg_top_ko);
927 
928 	return ret;
929 }
930 
931 static void __exit fw_cfg_sysfs_exit(void)
932 {
933 	platform_driver_unregister(&fw_cfg_sysfs_driver);
934 
935 #ifdef CONFIG_FW_CFG_SYSFS_CMDLINE
936 	platform_device_unregister(fw_cfg_cmdline_dev);
937 #endif
938 
939 	/* clean up /sys/firmware/qemu_fw_cfg/ */
940 	fw_cfg_kobj_cleanup(fw_cfg_top_ko);
941 }
942 
943 module_init(fw_cfg_sysfs_init);
944 module_exit(fw_cfg_sysfs_exit);
945