xref: /linux/drivers/cxl/pmem.c (revision c2aa3089ad7e7fec3ec4a58d8d0904b5e9b392a1)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
3 #include <linux/libnvdimm.h>
4 #include <linux/unaligned.h>
5 #include <linux/device.h>
6 #include <linux/module.h>
7 #include <linux/ndctl.h>
8 #include <linux/async.h>
9 #include <linux/slab.h>
10 #include <linux/nd.h>
11 #include "cxlmem.h"
12 #include "cxl.h"
13 
14 static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
15 
16 static void clear_exclusive(void *mds)
17 {
18 	clear_exclusive_cxl_commands(mds, exclusive_cmds);
19 }
20 
21 static void unregister_nvdimm(void *nvdimm)
22 {
23 	nvdimm_delete(nvdimm);
24 }
25 
26 static ssize_t provider_show(struct device *dev, struct device_attribute *attr, char *buf)
27 {
28 	struct nvdimm *nvdimm = to_nvdimm(dev);
29 	struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
30 
31 	return sysfs_emit(buf, "%s\n", dev_name(&cxl_nvd->dev));
32 }
33 static DEVICE_ATTR_RO(provider);
34 
35 static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *buf)
36 {
37 	struct nvdimm *nvdimm = to_nvdimm(dev);
38 	struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
39 	struct cxl_dev_state *cxlds = cxl_nvd->cxlmd->cxlds;
40 
41 	return sysfs_emit(buf, "%lld\n", cxlds->serial);
42 }
43 static DEVICE_ATTR_RO(id);
44 
45 static ssize_t dirty_shutdown_show(struct device *dev,
46 				   struct device_attribute *attr, char *buf)
47 {
48 	struct nvdimm *nvdimm = to_nvdimm(dev);
49 	struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
50 
51 	return sysfs_emit(buf, "%llu\n", cxl_nvd->dirty_shutdowns);
52 }
53 static DEVICE_ATTR_RO(dirty_shutdown);
54 
55 static struct attribute *cxl_dimm_attributes[] = {
56 	&dev_attr_id.attr,
57 	&dev_attr_provider.attr,
58 	&dev_attr_dirty_shutdown.attr,
59 	NULL
60 };
61 
62 #define CXL_INVALID_DIRTY_SHUTDOWN_COUNT ULLONG_MAX
63 static umode_t cxl_dimm_visible(struct kobject *kobj,
64 				struct attribute *a, int n)
65 {
66 	if (a == &dev_attr_dirty_shutdown.attr) {
67 		struct device *dev = kobj_to_dev(kobj);
68 		struct nvdimm *nvdimm = to_nvdimm(dev);
69 		struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
70 
71 		if (cxl_nvd->dirty_shutdowns ==
72 		    CXL_INVALID_DIRTY_SHUTDOWN_COUNT)
73 			return 0;
74 	}
75 
76 	return a->mode;
77 }
78 
79 static const struct attribute_group cxl_dimm_attribute_group = {
80 	.name = "cxl",
81 	.attrs = cxl_dimm_attributes,
82 	.is_visible = cxl_dimm_visible
83 };
84 
85 static const struct attribute_group *cxl_dimm_attribute_groups[] = {
86 	&cxl_dimm_attribute_group,
87 	NULL
88 };
89 
90 static void cxl_nvdimm_arm_dirty_shutdown_tracking(struct cxl_nvdimm *cxl_nvd)
91 {
92 	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
93 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
94 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
95 	struct device *dev = &cxl_nvd->dev;
96 	u32 count;
97 
98 	/*
99 	 * Dirty tracking is enabled and exposed to the user, only when:
100 	 *   - dirty shutdown on the device can be set, and,
101 	 *   - the device has a Device GPF DVSEC (albeit unused), and,
102 	 *   - the Get Health Info cmd can retrieve the device's dirty count.
103 	 */
104 	cxl_nvd->dirty_shutdowns = CXL_INVALID_DIRTY_SHUTDOWN_COUNT;
105 
106 	if (cxl_arm_dirty_shutdown(mds)) {
107 		dev_warn(dev, "GPF: could not set dirty shutdown state\n");
108 		return;
109 	}
110 
111 	if (!cxl_gpf_get_dvsec(cxlds->dev))
112 		return;
113 
114 	if (cxl_get_dirty_count(mds, &count)) {
115 		dev_warn(dev, "GPF: could not retrieve dirty count\n");
116 		return;
117 	}
118 
119 	cxl_nvd->dirty_shutdowns = count;
120 }
121 
122 static int cxl_nvdimm_probe(struct device *dev)
123 {
124 	struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
125 	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
126 	struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
127 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
128 	unsigned long flags = 0, cmd_mask = 0;
129 	struct nvdimm *nvdimm;
130 	int rc;
131 
132 	set_exclusive_cxl_commands(mds, exclusive_cmds);
133 	rc = devm_add_action_or_reset(dev, clear_exclusive, mds);
134 	if (rc)
135 		return rc;
136 
137 	set_bit(NDD_LABELING, &flags);
138 	set_bit(NDD_REGISTER_SYNC, &flags);
139 	set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
140 	set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
141 	set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
142 
143 	/*
144 	 * Set dirty shutdown now, with the expectation that the device
145 	 * clear it upon a successful GPF flow. The exception to this
146 	 * is upon Viral detection, per CXL 3.2 section 12.4.2.
147 	 */
148 	cxl_nvdimm_arm_dirty_shutdown_tracking(cxl_nvd);
149 
150 	nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd,
151 				 cxl_dimm_attribute_groups, flags,
152 				 cmd_mask, 0, NULL, cxl_nvd->dev_id,
153 				 cxl_security_ops, NULL);
154 	if (!nvdimm)
155 		return -ENOMEM;
156 
157 	dev_set_drvdata(dev, nvdimm);
158 	return devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
159 }
160 
161 static struct cxl_driver cxl_nvdimm_driver = {
162 	.name = "cxl_nvdimm",
163 	.probe = cxl_nvdimm_probe,
164 	.id = CXL_DEVICE_NVDIMM,
165 	.drv = {
166 		.suppress_bind_attrs = true,
167 	},
168 };
169 
170 static int cxl_pmem_get_config_size(struct cxl_memdev_state *mds,
171 				    struct nd_cmd_get_config_size *cmd,
172 				    unsigned int buf_len)
173 {
174 	struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
175 
176 	if (sizeof(*cmd) > buf_len)
177 		return -EINVAL;
178 
179 	*cmd = (struct nd_cmd_get_config_size){
180 		.config_size = mds->lsa_size,
181 		.max_xfer =
182 			cxl_mbox->payload_size - sizeof(struct cxl_mbox_set_lsa),
183 	};
184 
185 	return 0;
186 }
187 
188 static int cxl_pmem_get_config_data(struct cxl_memdev_state *mds,
189 				    struct nd_cmd_get_config_data_hdr *cmd,
190 				    unsigned int buf_len)
191 {
192 	struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
193 	struct cxl_mbox_get_lsa get_lsa;
194 	struct cxl_mbox_cmd mbox_cmd;
195 	int rc;
196 
197 	if (sizeof(*cmd) > buf_len)
198 		return -EINVAL;
199 	if (struct_size(cmd, out_buf, cmd->in_length) > buf_len)
200 		return -EINVAL;
201 
202 	get_lsa = (struct cxl_mbox_get_lsa) {
203 		.offset = cpu_to_le32(cmd->in_offset),
204 		.length = cpu_to_le32(cmd->in_length),
205 	};
206 	mbox_cmd = (struct cxl_mbox_cmd) {
207 		.opcode = CXL_MBOX_OP_GET_LSA,
208 		.payload_in = &get_lsa,
209 		.size_in = sizeof(get_lsa),
210 		.size_out = cmd->in_length,
211 		.payload_out = cmd->out_buf,
212 	};
213 
214 	rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
215 	cmd->status = 0;
216 
217 	return rc;
218 }
219 
220 static int cxl_pmem_set_config_data(struct cxl_memdev_state *mds,
221 				    struct nd_cmd_set_config_hdr *cmd,
222 				    unsigned int buf_len)
223 {
224 	struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
225 	struct cxl_mbox_set_lsa *set_lsa;
226 	struct cxl_mbox_cmd mbox_cmd;
227 	int rc;
228 
229 	if (sizeof(*cmd) > buf_len)
230 		return -EINVAL;
231 
232 	/* 4-byte status follows the input data in the payload */
233 	if (size_add(struct_size(cmd, in_buf, cmd->in_length), 4) > buf_len)
234 		return -EINVAL;
235 
236 	set_lsa =
237 		kvzalloc(struct_size(set_lsa, data, cmd->in_length), GFP_KERNEL);
238 	if (!set_lsa)
239 		return -ENOMEM;
240 
241 	*set_lsa = (struct cxl_mbox_set_lsa) {
242 		.offset = cpu_to_le32(cmd->in_offset),
243 	};
244 	memcpy(set_lsa->data, cmd->in_buf, cmd->in_length);
245 	mbox_cmd = (struct cxl_mbox_cmd) {
246 		.opcode = CXL_MBOX_OP_SET_LSA,
247 		.payload_in = set_lsa,
248 		.size_in = struct_size(set_lsa, data, cmd->in_length),
249 	};
250 
251 	rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
252 
253 	/*
254 	 * Set "firmware" status (4-packed bytes at the end of the input
255 	 * payload.
256 	 */
257 	put_unaligned(0, (u32 *) &cmd->in_buf[cmd->in_length]);
258 	kvfree(set_lsa);
259 
260 	return rc;
261 }
262 
263 static int cxl_pmem_nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd,
264 			       void *buf, unsigned int buf_len)
265 {
266 	struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
267 	unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
268 	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
269 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
270 
271 	if (!test_bit(cmd, &cmd_mask))
272 		return -ENOTTY;
273 
274 	switch (cmd) {
275 	case ND_CMD_GET_CONFIG_SIZE:
276 		return cxl_pmem_get_config_size(mds, buf, buf_len);
277 	case ND_CMD_GET_CONFIG_DATA:
278 		return cxl_pmem_get_config_data(mds, buf, buf_len);
279 	case ND_CMD_SET_CONFIG_DATA:
280 		return cxl_pmem_set_config_data(mds, buf, buf_len);
281 	default:
282 		return -ENOTTY;
283 	}
284 }
285 
286 static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc,
287 			struct nvdimm *nvdimm, unsigned int cmd, void *buf,
288 			unsigned int buf_len, int *cmd_rc)
289 {
290 	/*
291 	 * No firmware response to translate, let the transport error
292 	 * code take precedence.
293 	 */
294 	*cmd_rc = 0;
295 
296 	if (!nvdimm)
297 		return -ENOTTY;
298 	return cxl_pmem_nvdimm_ctl(nvdimm, cmd, buf, buf_len);
299 }
300 
301 static int detach_nvdimm(struct device *dev, void *data)
302 {
303 	struct cxl_nvdimm *cxl_nvd;
304 	bool release = false;
305 
306 	if (!is_cxl_nvdimm(dev))
307 		return 0;
308 
309 	scoped_guard(device, dev) {
310 		if (dev->driver) {
311 			cxl_nvd = to_cxl_nvdimm(dev);
312 			if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data)
313 				release = true;
314 		}
315 	}
316 	if (release)
317 		device_release_driver(dev);
318 	return 0;
319 }
320 
321 static void unregister_nvdimm_bus(void *_cxl_nvb)
322 {
323 	struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb;
324 	struct nvdimm_bus *nvdimm_bus = cxl_nvb->nvdimm_bus;
325 
326 	bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb, detach_nvdimm);
327 
328 	cxl_nvb->nvdimm_bus = NULL;
329 	nvdimm_bus_unregister(nvdimm_bus);
330 }
331 
332 static int cxl_nvdimm_bridge_probe(struct device *dev)
333 {
334 	struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
335 
336 	cxl_nvb->nd_desc = (struct nvdimm_bus_descriptor) {
337 		.provider_name = "CXL",
338 		.module = THIS_MODULE,
339 		.ndctl = cxl_pmem_ctl,
340 	};
341 
342 	cxl_nvb->nvdimm_bus =
343 		nvdimm_bus_register(&cxl_nvb->dev, &cxl_nvb->nd_desc);
344 
345 	if (!cxl_nvb->nvdimm_bus)
346 		return -ENOMEM;
347 
348 	return devm_add_action_or_reset(dev, unregister_nvdimm_bus, cxl_nvb);
349 }
350 
351 static struct cxl_driver cxl_nvdimm_bridge_driver = {
352 	.name = "cxl_nvdimm_bridge",
353 	.probe = cxl_nvdimm_bridge_probe,
354 	.id = CXL_DEVICE_NVDIMM_BRIDGE,
355 	.drv = {
356 		.suppress_bind_attrs = true,
357 	},
358 };
359 
360 static void unregister_nvdimm_region(void *nd_region)
361 {
362 	nvdimm_region_delete(nd_region);
363 }
364 
365 static void cxlr_pmem_remove_resource(void *res)
366 {
367 	remove_resource(res);
368 }
369 
370 struct cxl_pmem_region_info {
371 	u64 offset;
372 	u64 serial;
373 };
374 
375 static int cxl_pmem_region_probe(struct device *dev)
376 {
377 	struct nd_mapping_desc mappings[CXL_DECODER_MAX_INTERLEAVE];
378 	struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
379 	struct cxl_region *cxlr = cxlr_pmem->cxlr;
380 	struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
381 	struct cxl_pmem_region_info *info = NULL;
382 	struct nd_interleave_set *nd_set;
383 	struct nd_region_desc ndr_desc;
384 	struct cxl_nvdimm *cxl_nvd;
385 	struct nvdimm *nvdimm;
386 	struct resource *res;
387 	int rc, i = 0;
388 
389 	memset(&mappings, 0, sizeof(mappings));
390 	memset(&ndr_desc, 0, sizeof(ndr_desc));
391 
392 	res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
393 	if (!res)
394 		return -ENOMEM;
395 
396 	res->name = "Persistent Memory";
397 	res->start = cxlr_pmem->hpa_range.start;
398 	res->end = cxlr_pmem->hpa_range.end;
399 	res->flags = IORESOURCE_MEM;
400 	res->desc = IORES_DESC_PERSISTENT_MEMORY;
401 
402 	rc = insert_resource(&iomem_resource, res);
403 	if (rc)
404 		return rc;
405 
406 	rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res);
407 	if (rc)
408 		return rc;
409 
410 	ndr_desc.res = res;
411 	ndr_desc.provider_data = cxlr_pmem;
412 
413 	ndr_desc.numa_node = memory_add_physaddr_to_nid(res->start);
414 	ndr_desc.target_node = phys_to_target_node(res->start);
415 	if (ndr_desc.target_node == NUMA_NO_NODE) {
416 		ndr_desc.target_node = ndr_desc.numa_node;
417 		dev_dbg(&cxlr->dev, "changing target node from %d to %d",
418 			NUMA_NO_NODE, ndr_desc.target_node);
419 	}
420 
421 	nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
422 	if (!nd_set)
423 		return -ENOMEM;
424 
425 	ndr_desc.memregion = cxlr->id;
426 	set_bit(ND_REGION_CXL, &ndr_desc.flags);
427 	set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
428 
429 	info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL);
430 	if (!info)
431 		return -ENOMEM;
432 
433 	for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
434 		struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
435 		struct cxl_memdev *cxlmd = m->cxlmd;
436 		struct cxl_dev_state *cxlds = cxlmd->cxlds;
437 
438 		cxl_nvd = cxlmd->cxl_nvd;
439 		nvdimm = dev_get_drvdata(&cxl_nvd->dev);
440 		if (!nvdimm) {
441 			dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i,
442 				dev_name(&cxlmd->dev));
443 			rc = -ENODEV;
444 			goto out_nvd;
445 		}
446 
447 		if (cxlds->serial == 0) {
448 			/* include missing alongside invalid in this error message. */
449 			dev_err(dev, "%s: invalid or missing serial number\n",
450 				dev_name(&cxlmd->dev));
451 			rc = -ENXIO;
452 			goto out_nvd;
453 		}
454 		info[i].serial = cxlds->serial;
455 		info[i].offset = m->start;
456 
457 		m->cxl_nvd = cxl_nvd;
458 		mappings[i] = (struct nd_mapping_desc) {
459 			.nvdimm = nvdimm,
460 			.start = m->start,
461 			.size = m->size,
462 			.position = i,
463 		};
464 	}
465 	ndr_desc.num_mappings = cxlr_pmem->nr_mappings;
466 	ndr_desc.mapping = mappings;
467 
468 	/*
469 	 * TODO enable CXL labels which skip the need for 'interleave-set cookie'
470 	 */
471 	nd_set->cookie1 =
472 		nd_fletcher64(info, sizeof(*info) * cxlr_pmem->nr_mappings, 0);
473 	nd_set->cookie2 = nd_set->cookie1;
474 	ndr_desc.nd_set = nd_set;
475 
476 	cxlr_pmem->nd_region =
477 		nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc);
478 	if (!cxlr_pmem->nd_region) {
479 		rc = -ENOMEM;
480 		goto out_nvd;
481 	}
482 
483 	rc = devm_add_action_or_reset(dev, unregister_nvdimm_region,
484 				      cxlr_pmem->nd_region);
485 out_nvd:
486 	kfree(info);
487 
488 	return rc;
489 }
490 
491 static struct cxl_driver cxl_pmem_region_driver = {
492 	.name = "cxl_pmem_region",
493 	.probe = cxl_pmem_region_probe,
494 	.id = CXL_DEVICE_PMEM_REGION,
495 	.drv = {
496 		.suppress_bind_attrs = true,
497 	},
498 };
499 
500 static __init int cxl_pmem_init(void)
501 {
502 	int rc;
503 
504 	set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds);
505 	set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds);
506 
507 	rc = cxl_driver_register(&cxl_nvdimm_bridge_driver);
508 	if (rc)
509 		return rc;
510 
511 	rc = cxl_driver_register(&cxl_nvdimm_driver);
512 	if (rc)
513 		goto err_nvdimm;
514 
515 	rc = cxl_driver_register(&cxl_pmem_region_driver);
516 	if (rc)
517 		goto err_region;
518 
519 	return 0;
520 
521 err_region:
522 	cxl_driver_unregister(&cxl_nvdimm_driver);
523 err_nvdimm:
524 	cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
525 	return rc;
526 }
527 
528 static __exit void cxl_pmem_exit(void)
529 {
530 	cxl_driver_unregister(&cxl_pmem_region_driver);
531 	cxl_driver_unregister(&cxl_nvdimm_driver);
532 	cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
533 }
534 
535 MODULE_DESCRIPTION("CXL PMEM: Persistent Memory Support");
536 MODULE_LICENSE("GPL v2");
537 module_init(cxl_pmem_init);
538 module_exit(cxl_pmem_exit);
539 MODULE_IMPORT_NS("CXL");
540 MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE);
541 MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM);
542 MODULE_ALIAS_CXL(CXL_DEVICE_PMEM_REGION);
543