xref: /linux/drivers/cxl/core/memdev.c (revision 06ba8020287f43fc13962b158d8dec2689448a5a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2020 Intel Corporation. */
3 
4 #include <linux/device.h>
5 #include <linux/slab.h>
6 #include <linux/idr.h>
7 #include <linux/pci.h>
8 #include <cxlmem.h>
9 #include "trace.h"
10 #include "core.h"
11 
12 static DECLARE_RWSEM(cxl_memdev_rwsem);
13 
14 /*
15  * An entire PCI topology full of devices should be enough for any
16  * config
17  */
18 #define CXL_MEM_MAX_DEVS 65536
19 
20 static int cxl_mem_major;
21 static DEFINE_IDA(cxl_memdev_ida);
22 
23 static void cxl_memdev_release(struct device *dev)
24 {
25 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
26 
27 	ida_free(&cxl_memdev_ida, cxlmd->id);
28 	kfree(cxlmd);
29 }
30 
31 static char *cxl_memdev_devnode(const struct device *dev, umode_t *mode, kuid_t *uid,
32 				kgid_t *gid)
33 {
34 	return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
35 }
36 
37 static ssize_t firmware_version_show(struct device *dev,
38 				     struct device_attribute *attr, char *buf)
39 {
40 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
41 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
42 
43 	return sysfs_emit(buf, "%.16s\n", cxlds->firmware_version);
44 }
45 static DEVICE_ATTR_RO(firmware_version);
46 
47 static ssize_t payload_max_show(struct device *dev,
48 				struct device_attribute *attr, char *buf)
49 {
50 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
51 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
52 
53 	return sysfs_emit(buf, "%zu\n", cxlds->payload_size);
54 }
55 static DEVICE_ATTR_RO(payload_max);
56 
57 static ssize_t label_storage_size_show(struct device *dev,
58 				       struct device_attribute *attr, char *buf)
59 {
60 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
61 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
62 
63 	return sysfs_emit(buf, "%zu\n", cxlds->lsa_size);
64 }
65 static DEVICE_ATTR_RO(label_storage_size);
66 
67 static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
68 			     char *buf)
69 {
70 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
71 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
72 	unsigned long long len = resource_size(&cxlds->ram_res);
73 
74 	return sysfs_emit(buf, "%#llx\n", len);
75 }
76 
77 static struct device_attribute dev_attr_ram_size =
78 	__ATTR(size, 0444, ram_size_show, NULL);
79 
80 static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
81 			      char *buf)
82 {
83 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
84 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
85 	unsigned long long len = resource_size(&cxlds->pmem_res);
86 
87 	return sysfs_emit(buf, "%#llx\n", len);
88 }
89 
90 static struct device_attribute dev_attr_pmem_size =
91 	__ATTR(size, 0444, pmem_size_show, NULL);
92 
93 static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
94 			   char *buf)
95 {
96 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
97 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
98 
99 	return sysfs_emit(buf, "%#llx\n", cxlds->serial);
100 }
101 static DEVICE_ATTR_RO(serial);
102 
103 static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
104 			      char *buf)
105 {
106 	return sprintf(buf, "%d\n", dev_to_node(dev));
107 }
108 static DEVICE_ATTR_RO(numa_node);
109 
110 static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
111 {
112 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
113 	u64 offset, length;
114 	int rc = 0;
115 
116 	/* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */
117 	if (resource_size(&cxlds->pmem_res)) {
118 		offset = cxlds->pmem_res.start;
119 		length = resource_size(&cxlds->pmem_res);
120 		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
121 		if (rc)
122 			return rc;
123 	}
124 	if (resource_size(&cxlds->ram_res)) {
125 		offset = cxlds->ram_res.start;
126 		length = resource_size(&cxlds->ram_res);
127 		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
128 		/*
129 		 * Invalid Physical Address is not an error for
130 		 * volatile addresses. Device support is optional.
131 		 */
132 		if (rc == -EFAULT)
133 			rc = 0;
134 	}
135 	return rc;
136 }
137 
138 int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
139 {
140 	struct cxl_port *port;
141 	int rc;
142 
143 	port = dev_get_drvdata(&cxlmd->dev);
144 	if (!port || !is_cxl_endpoint(port))
145 		return -EINVAL;
146 
147 	rc = down_read_interruptible(&cxl_dpa_rwsem);
148 	if (rc)
149 		return rc;
150 
151 	if (port->commit_end == -1) {
152 		/* No regions mapped to this memdev */
153 		rc = cxl_get_poison_by_memdev(cxlmd);
154 	} else {
155 		/* Regions mapped, collect poison by endpoint */
156 		rc =  cxl_get_poison_by_endpoint(port);
157 	}
158 	up_read(&cxl_dpa_rwsem);
159 
160 	return rc;
161 }
162 EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL);
163 
164 struct cxl_dpa_to_region_context {
165 	struct cxl_region *cxlr;
166 	u64 dpa;
167 };
168 
169 static int __cxl_dpa_to_region(struct device *dev, void *arg)
170 {
171 	struct cxl_dpa_to_region_context *ctx = arg;
172 	struct cxl_endpoint_decoder *cxled;
173 	u64 dpa = ctx->dpa;
174 
175 	if (!is_endpoint_decoder(dev))
176 		return 0;
177 
178 	cxled = to_cxl_endpoint_decoder(dev);
179 	if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
180 		return 0;
181 
182 	if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start)
183 		return 0;
184 
185 	dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
186 		dev_name(&cxled->cxld.region->dev));
187 
188 	ctx->cxlr = cxled->cxld.region;
189 
190 	return 1;
191 }
192 
193 static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa)
194 {
195 	struct cxl_dpa_to_region_context ctx;
196 	struct cxl_port *port;
197 
198 	ctx = (struct cxl_dpa_to_region_context) {
199 		.dpa = dpa,
200 	};
201 	port = dev_get_drvdata(&cxlmd->dev);
202 	if (port && is_cxl_endpoint(port) && port->commit_end != -1)
203 		device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
204 
205 	return ctx.cxlr;
206 }
207 
208 static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
209 {
210 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
211 
212 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
213 		return 0;
214 
215 	if (!resource_size(&cxlds->dpa_res)) {
216 		dev_dbg(cxlds->dev, "device has no dpa resource\n");
217 		return -EINVAL;
218 	}
219 	if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) {
220 		dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n",
221 			dpa, &cxlds->dpa_res);
222 		return -EINVAL;
223 	}
224 	if (!IS_ALIGNED(dpa, 64)) {
225 		dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa);
226 		return -EINVAL;
227 	}
228 
229 	return 0;
230 }
231 
232 int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
233 {
234 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
235 	struct cxl_mbox_inject_poison inject;
236 	struct cxl_poison_record record;
237 	struct cxl_mbox_cmd mbox_cmd;
238 	struct cxl_region *cxlr;
239 	int rc;
240 
241 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
242 		return 0;
243 
244 	rc = down_read_interruptible(&cxl_dpa_rwsem);
245 	if (rc)
246 		return rc;
247 
248 	rc = cxl_validate_poison_dpa(cxlmd, dpa);
249 	if (rc)
250 		goto out;
251 
252 	inject.address = cpu_to_le64(dpa);
253 	mbox_cmd = (struct cxl_mbox_cmd) {
254 		.opcode = CXL_MBOX_OP_INJECT_POISON,
255 		.size_in = sizeof(inject),
256 		.payload_in = &inject,
257 	};
258 	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
259 	if (rc)
260 		goto out;
261 
262 	cxlr = cxl_dpa_to_region(cxlmd, dpa);
263 	if (cxlr)
264 		dev_warn_once(cxlds->dev,
265 			      "poison inject dpa:%#llx region: %s\n", dpa,
266 			      dev_name(&cxlr->dev));
267 
268 	record = (struct cxl_poison_record) {
269 		.address = cpu_to_le64(dpa),
270 		.length = cpu_to_le32(1),
271 	};
272 	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
273 out:
274 	up_read(&cxl_dpa_rwsem);
275 
276 	return rc;
277 }
278 EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL);
279 
280 int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
281 {
282 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
283 	struct cxl_mbox_clear_poison clear;
284 	struct cxl_poison_record record;
285 	struct cxl_mbox_cmd mbox_cmd;
286 	struct cxl_region *cxlr;
287 	int rc;
288 
289 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
290 		return 0;
291 
292 	rc = down_read_interruptible(&cxl_dpa_rwsem);
293 	if (rc)
294 		return rc;
295 
296 	rc = cxl_validate_poison_dpa(cxlmd, dpa);
297 	if (rc)
298 		goto out;
299 
300 	/*
301 	 * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command
302 	 * is defined to accept 64 bytes of write-data, along with the
303 	 * address to clear. This driver uses zeroes as write-data.
304 	 */
305 	clear = (struct cxl_mbox_clear_poison) {
306 		.address = cpu_to_le64(dpa)
307 	};
308 
309 	mbox_cmd = (struct cxl_mbox_cmd) {
310 		.opcode = CXL_MBOX_OP_CLEAR_POISON,
311 		.size_in = sizeof(clear),
312 		.payload_in = &clear,
313 	};
314 
315 	rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
316 	if (rc)
317 		goto out;
318 
319 	cxlr = cxl_dpa_to_region(cxlmd, dpa);
320 	if (cxlr)
321 		dev_warn_once(cxlds->dev, "poison clear dpa:%#llx region: %s\n",
322 			      dpa, dev_name(&cxlr->dev));
323 
324 	record = (struct cxl_poison_record) {
325 		.address = cpu_to_le64(dpa),
326 		.length = cpu_to_le32(1),
327 	};
328 	trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
329 out:
330 	up_read(&cxl_dpa_rwsem);
331 
332 	return rc;
333 }
334 EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, CXL);
335 
336 static struct attribute *cxl_memdev_attributes[] = {
337 	&dev_attr_serial.attr,
338 	&dev_attr_firmware_version.attr,
339 	&dev_attr_payload_max.attr,
340 	&dev_attr_label_storage_size.attr,
341 	&dev_attr_numa_node.attr,
342 	NULL,
343 };
344 
345 static struct attribute *cxl_memdev_pmem_attributes[] = {
346 	&dev_attr_pmem_size.attr,
347 	NULL,
348 };
349 
350 static struct attribute *cxl_memdev_ram_attributes[] = {
351 	&dev_attr_ram_size.attr,
352 	NULL,
353 };
354 
355 static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a,
356 				  int n)
357 {
358 	if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr)
359 		return 0;
360 	return a->mode;
361 }
362 
363 static struct attribute_group cxl_memdev_attribute_group = {
364 	.attrs = cxl_memdev_attributes,
365 	.is_visible = cxl_memdev_visible,
366 };
367 
368 static struct attribute_group cxl_memdev_ram_attribute_group = {
369 	.name = "ram",
370 	.attrs = cxl_memdev_ram_attributes,
371 };
372 
373 static struct attribute_group cxl_memdev_pmem_attribute_group = {
374 	.name = "pmem",
375 	.attrs = cxl_memdev_pmem_attributes,
376 };
377 
378 static const struct attribute_group *cxl_memdev_attribute_groups[] = {
379 	&cxl_memdev_attribute_group,
380 	&cxl_memdev_ram_attribute_group,
381 	&cxl_memdev_pmem_attribute_group,
382 	NULL,
383 };
384 
385 static const struct device_type cxl_memdev_type = {
386 	.name = "cxl_memdev",
387 	.release = cxl_memdev_release,
388 	.devnode = cxl_memdev_devnode,
389 	.groups = cxl_memdev_attribute_groups,
390 };
391 
392 bool is_cxl_memdev(const struct device *dev)
393 {
394 	return dev->type == &cxl_memdev_type;
395 }
396 EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL);
397 
398 /**
399  * set_exclusive_cxl_commands() - atomically disable user cxl commands
400  * @cxlds: The device state to operate on
401  * @cmds: bitmap of commands to mark exclusive
402  *
403  * Grab the cxl_memdev_rwsem in write mode to flush in-flight
404  * invocations of the ioctl path and then disable future execution of
405  * commands with the command ids set in @cmds.
406  */
407 void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
408 {
409 	down_write(&cxl_memdev_rwsem);
410 	bitmap_or(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
411 		  CXL_MEM_COMMAND_ID_MAX);
412 	up_write(&cxl_memdev_rwsem);
413 }
414 EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, CXL);
415 
416 /**
417  * clear_exclusive_cxl_commands() - atomically enable user cxl commands
418  * @cxlds: The device state to modify
419  * @cmds: bitmap of commands to mark available for userspace
420  */
421 void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
422 {
423 	down_write(&cxl_memdev_rwsem);
424 	bitmap_andnot(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
425 		      CXL_MEM_COMMAND_ID_MAX);
426 	up_write(&cxl_memdev_rwsem);
427 }
428 EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL);
429 
430 static void cxl_memdev_shutdown(struct device *dev)
431 {
432 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
433 
434 	down_write(&cxl_memdev_rwsem);
435 	cxlmd->cxlds = NULL;
436 	up_write(&cxl_memdev_rwsem);
437 }
438 
439 static void cxl_memdev_unregister(void *_cxlmd)
440 {
441 	struct cxl_memdev *cxlmd = _cxlmd;
442 	struct device *dev = &cxlmd->dev;
443 
444 	cxl_memdev_shutdown(dev);
445 	cdev_device_del(&cxlmd->cdev, dev);
446 	put_device(dev);
447 }
448 
449 static void detach_memdev(struct work_struct *work)
450 {
451 	struct cxl_memdev *cxlmd;
452 
453 	cxlmd = container_of(work, typeof(*cxlmd), detach_work);
454 	device_release_driver(&cxlmd->dev);
455 	put_device(&cxlmd->dev);
456 }
457 
458 static struct lock_class_key cxl_memdev_key;
459 
460 static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
461 					   const struct file_operations *fops)
462 {
463 	struct cxl_memdev *cxlmd;
464 	struct device *dev;
465 	struct cdev *cdev;
466 	int rc;
467 
468 	cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
469 	if (!cxlmd)
470 		return ERR_PTR(-ENOMEM);
471 
472 	rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL);
473 	if (rc < 0)
474 		goto err;
475 	cxlmd->id = rc;
476 	cxlmd->depth = -1;
477 
478 	dev = &cxlmd->dev;
479 	device_initialize(dev);
480 	lockdep_set_class(&dev->mutex, &cxl_memdev_key);
481 	dev->parent = cxlds->dev;
482 	dev->bus = &cxl_bus_type;
483 	dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
484 	dev->type = &cxl_memdev_type;
485 	device_set_pm_not_required(dev);
486 	INIT_WORK(&cxlmd->detach_work, detach_memdev);
487 
488 	cdev = &cxlmd->cdev;
489 	cdev_init(cdev, fops);
490 	return cxlmd;
491 
492 err:
493 	kfree(cxlmd);
494 	return ERR_PTR(rc);
495 }
496 
497 static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
498 			       unsigned long arg)
499 {
500 	switch (cmd) {
501 	case CXL_MEM_QUERY_COMMANDS:
502 		return cxl_query_cmd(cxlmd, (void __user *)arg);
503 	case CXL_MEM_SEND_COMMAND:
504 		return cxl_send_cmd(cxlmd, (void __user *)arg);
505 	default:
506 		return -ENOTTY;
507 	}
508 }
509 
510 static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
511 			     unsigned long arg)
512 {
513 	struct cxl_memdev *cxlmd = file->private_data;
514 	int rc = -ENXIO;
515 
516 	down_read(&cxl_memdev_rwsem);
517 	if (cxlmd->cxlds)
518 		rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
519 	up_read(&cxl_memdev_rwsem);
520 
521 	return rc;
522 }
523 
524 static int cxl_memdev_open(struct inode *inode, struct file *file)
525 {
526 	struct cxl_memdev *cxlmd =
527 		container_of(inode->i_cdev, typeof(*cxlmd), cdev);
528 
529 	get_device(&cxlmd->dev);
530 	file->private_data = cxlmd;
531 
532 	return 0;
533 }
534 
535 static int cxl_memdev_release_file(struct inode *inode, struct file *file)
536 {
537 	struct cxl_memdev *cxlmd =
538 		container_of(inode->i_cdev, typeof(*cxlmd), cdev);
539 
540 	put_device(&cxlmd->dev);
541 
542 	return 0;
543 }
544 
545 static const struct file_operations cxl_memdev_fops = {
546 	.owner = THIS_MODULE,
547 	.unlocked_ioctl = cxl_memdev_ioctl,
548 	.open = cxl_memdev_open,
549 	.release = cxl_memdev_release_file,
550 	.compat_ioctl = compat_ptr_ioctl,
551 	.llseek = noop_llseek,
552 };
553 
554 struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
555 {
556 	struct cxl_memdev *cxlmd;
557 	struct device *dev;
558 	struct cdev *cdev;
559 	int rc;
560 
561 	cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops);
562 	if (IS_ERR(cxlmd))
563 		return cxlmd;
564 
565 	dev = &cxlmd->dev;
566 	rc = dev_set_name(dev, "mem%d", cxlmd->id);
567 	if (rc)
568 		goto err;
569 
570 	/*
571 	 * Activate ioctl operations, no cxl_memdev_rwsem manipulation
572 	 * needed as this is ordered with cdev_add() publishing the device.
573 	 */
574 	cxlmd->cxlds = cxlds;
575 	cxlds->cxlmd = cxlmd;
576 
577 	cdev = &cxlmd->cdev;
578 	rc = cdev_device_add(cdev, dev);
579 	if (rc)
580 		goto err;
581 
582 	rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd);
583 	if (rc)
584 		return ERR_PTR(rc);
585 	return cxlmd;
586 
587 err:
588 	/*
589 	 * The cdev was briefly live, shutdown any ioctl operations that
590 	 * saw that state.
591 	 */
592 	cxl_memdev_shutdown(dev);
593 	put_device(dev);
594 	return ERR_PTR(rc);
595 }
596 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
597 
598 __init int cxl_memdev_init(void)
599 {
600 	dev_t devt;
601 	int rc;
602 
603 	rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
604 	if (rc)
605 		return rc;
606 
607 	cxl_mem_major = MAJOR(devt);
608 
609 	return 0;
610 }
611 
612 void cxl_memdev_exit(void)
613 {
614 	unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
615 }
616