xref: /linux/drivers/nvdimm/pmem.c (revision b40f4757daa1b28e586fddad76638c98e2edfc34)
1 /*
2  * Persistent Memory Driver
3  *
4  * Copyright (c) 2014-2015, Intel Corporation.
5  * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
6  * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  */
17 
18 #include <asm/cacheflush.h>
19 #include <linux/blkdev.h>
20 #include <linux/hdreg.h>
21 #include <linux/init.h>
22 #include <linux/platform_device.h>
23 #include <linux/module.h>
24 #include <linux/moduleparam.h>
25 #include <linux/badblocks.h>
26 #include <linux/memremap.h>
27 #include <linux/vmalloc.h>
28 #include <linux/pfn_t.h>
29 #include <linux/slab.h>
30 #include <linux/pmem.h>
31 #include <linux/nd.h>
32 #include "pfn.h"
33 #include "nd.h"
34 
35 struct pmem_device {
36 	struct request_queue	*pmem_queue;
37 	struct gendisk		*pmem_disk;
38 	struct nd_namespace_common *ndns;
39 
40 	/* One contiguous memory region per device */
41 	phys_addr_t		phys_addr;
42 	/* when non-zero this device is hosting a 'pfn' instance */
43 	phys_addr_t		data_offset;
44 	u64			pfn_flags;
45 	void __pmem		*virt_addr;
46 	/* immutable base size of the namespace */
47 	size_t			size;
48 	/* trim size when namespace capacity has been section aligned */
49 	u32			pfn_pad;
50 	struct badblocks	bb;
51 };
52 
53 static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len)
54 {
55 	if (bb->count) {
56 		sector_t first_bad;
57 		int num_bad;
58 
59 		return !!badblocks_check(bb, sector, len / 512, &first_bad,
60 				&num_bad);
61 	}
62 
63 	return false;
64 }
65 
66 static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
67 		unsigned int len)
68 {
69 	struct device *dev = disk_to_dev(pmem->pmem_disk);
70 	sector_t sector;
71 	long cleared;
72 
73 	sector = (offset - pmem->data_offset) / 512;
74 	cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
75 
76 	if (cleared > 0 && cleared / 512) {
77 		dev_dbg(dev, "%s: %llx clear %ld sector%s\n",
78 				__func__, (unsigned long long) sector,
79 				cleared / 512, cleared / 512 > 1 ? "s" : "");
80 		badblocks_clear(&pmem->bb, sector, cleared / 512);
81 	}
82 	invalidate_pmem(pmem->virt_addr + offset, len);
83 }
84 
85 static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
86 			unsigned int len, unsigned int off, int rw,
87 			sector_t sector)
88 {
89 	int rc = 0;
90 	bool bad_pmem = false;
91 	void *mem = kmap_atomic(page);
92 	phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
93 	void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
94 
95 	if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
96 		bad_pmem = true;
97 
98 	if (rw == READ) {
99 		if (unlikely(bad_pmem))
100 			rc = -EIO;
101 		else {
102 			rc = memcpy_from_pmem(mem + off, pmem_addr, len);
103 			flush_dcache_page(page);
104 		}
105 	} else {
106 		/*
107 		 * Note that we write the data both before and after
108 		 * clearing poison.  The write before clear poison
109 		 * handles situations where the latest written data is
110 		 * preserved and the clear poison operation simply marks
111 		 * the address range as valid without changing the data.
112 		 * In this case application software can assume that an
113 		 * interrupted write will either return the new good
114 		 * data or an error.
115 		 *
116 		 * However, if pmem_clear_poison() leaves the data in an
117 		 * indeterminate state we need to perform the write
118 		 * after clear poison.
119 		 */
120 		flush_dcache_page(page);
121 		memcpy_to_pmem(pmem_addr, mem + off, len);
122 		if (unlikely(bad_pmem)) {
123 			pmem_clear_poison(pmem, pmem_off, len);
124 			memcpy_to_pmem(pmem_addr, mem + off, len);
125 		}
126 	}
127 
128 	kunmap_atomic(mem);
129 	return rc;
130 }
131 
132 static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
133 {
134 	int rc = 0;
135 	bool do_acct;
136 	unsigned long start;
137 	struct bio_vec bvec;
138 	struct bvec_iter iter;
139 	struct block_device *bdev = bio->bi_bdev;
140 	struct pmem_device *pmem = bdev->bd_disk->private_data;
141 
142 	do_acct = nd_iostat_start(bio, &start);
143 	bio_for_each_segment(bvec, bio, iter) {
144 		rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
145 				bvec.bv_offset, bio_data_dir(bio),
146 				iter.bi_sector);
147 		if (rc) {
148 			bio->bi_error = rc;
149 			break;
150 		}
151 	}
152 	if (do_acct)
153 		nd_iostat_end(bio, start);
154 
155 	if (bio_data_dir(bio))
156 		wmb_pmem();
157 
158 	bio_endio(bio);
159 	return BLK_QC_T_NONE;
160 }
161 
162 static int pmem_rw_page(struct block_device *bdev, sector_t sector,
163 		       struct page *page, int rw)
164 {
165 	struct pmem_device *pmem = bdev->bd_disk->private_data;
166 	int rc;
167 
168 	rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
169 	if (rw & WRITE)
170 		wmb_pmem();
171 
172 	/*
173 	 * The ->rw_page interface is subtle and tricky.  The core
174 	 * retries on any error, so we can only invoke page_endio() in
175 	 * the successful completion case.  Otherwise, we'll see crashes
176 	 * caused by double completion.
177 	 */
178 	if (rc == 0)
179 		page_endio(page, rw & WRITE, 0);
180 
181 	return rc;
182 }
183 
184 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
185 		      void __pmem **kaddr, pfn_t *pfn)
186 {
187 	struct pmem_device *pmem = bdev->bd_disk->private_data;
188 	resource_size_t offset = sector * 512 + pmem->data_offset;
189 
190 	*kaddr = pmem->virt_addr + offset;
191 	*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
192 
193 	return pmem->size - pmem->pfn_pad - offset;
194 }
195 
196 static const struct block_device_operations pmem_fops = {
197 	.owner =		THIS_MODULE,
198 	.rw_page =		pmem_rw_page,
199 	.direct_access =	pmem_direct_access,
200 	.revalidate_disk =	nvdimm_revalidate_disk,
201 };
202 
203 static struct pmem_device *pmem_alloc(struct device *dev,
204 		struct resource *res, int id)
205 {
206 	struct pmem_device *pmem;
207 	struct request_queue *q;
208 
209 	pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
210 	if (!pmem)
211 		return ERR_PTR(-ENOMEM);
212 
213 	pmem->phys_addr = res->start;
214 	pmem->size = resource_size(res);
215 	if (!arch_has_wmb_pmem())
216 		dev_warn(dev, "unable to guarantee persistence of writes\n");
217 
218 	if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size,
219 			dev_name(dev))) {
220 		dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
221 				&pmem->phys_addr, pmem->size);
222 		return ERR_PTR(-EBUSY);
223 	}
224 
225 	q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
226 	if (!q)
227 		return ERR_PTR(-ENOMEM);
228 
229 	pmem->pfn_flags = PFN_DEV;
230 	if (pmem_should_map_pages(dev)) {
231 		pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res,
232 				&q->q_usage_counter, NULL);
233 		pmem->pfn_flags |= PFN_MAP;
234 	} else
235 		pmem->virt_addr = (void __pmem *) devm_memremap(dev,
236 				pmem->phys_addr, pmem->size,
237 				ARCH_MEMREMAP_PMEM);
238 
239 	if (IS_ERR(pmem->virt_addr)) {
240 		blk_cleanup_queue(q);
241 		return (void __force *) pmem->virt_addr;
242 	}
243 
244 	pmem->pmem_queue = q;
245 	return pmem;
246 }
247 
248 static void pmem_detach_disk(struct pmem_device *pmem)
249 {
250 	if (!pmem->pmem_disk)
251 		return;
252 
253 	del_gendisk(pmem->pmem_disk);
254 	put_disk(pmem->pmem_disk);
255 	blk_cleanup_queue(pmem->pmem_queue);
256 }
257 
258 static int pmem_attach_disk(struct device *dev,
259 		struct nd_namespace_common *ndns, struct pmem_device *pmem)
260 {
261 	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
262 	int nid = dev_to_node(dev);
263 	struct resource bb_res;
264 	struct gendisk *disk;
265 
266 	blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
267 	blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE);
268 	blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
269 	blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
270 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
271 
272 	disk = alloc_disk_node(0, nid);
273 	if (!disk) {
274 		blk_cleanup_queue(pmem->pmem_queue);
275 		return -ENOMEM;
276 	}
277 
278 	disk->fops		= &pmem_fops;
279 	disk->private_data	= pmem;
280 	disk->queue		= pmem->pmem_queue;
281 	disk->flags		= GENHD_FL_EXT_DEVT;
282 	nvdimm_namespace_disk_name(ndns, disk->disk_name);
283 	disk->driverfs_dev = dev;
284 	set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
285 			/ 512);
286 	pmem->pmem_disk = disk;
287 	devm_exit_badblocks(dev, &pmem->bb);
288 	if (devm_init_badblocks(dev, &pmem->bb))
289 		return -ENOMEM;
290 	bb_res.start = nsio->res.start + pmem->data_offset;
291 	bb_res.end = nsio->res.end;
292 	if (is_nd_pfn(dev)) {
293 		struct nd_pfn *nd_pfn = to_nd_pfn(dev);
294 		struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
295 
296 		bb_res.start += __le32_to_cpu(pfn_sb->start_pad);
297 		bb_res.end -= __le32_to_cpu(pfn_sb->end_trunc);
298 	}
299 	nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb,
300 			&bb_res);
301 	disk->bb = &pmem->bb;
302 	add_disk(disk);
303 	revalidate_disk(disk);
304 
305 	return 0;
306 }
307 
308 static int pmem_rw_bytes(struct nd_namespace_common *ndns,
309 		resource_size_t offset, void *buf, size_t size, int rw)
310 {
311 	struct pmem_device *pmem = dev_get_drvdata(ndns->claim);
312 
313 	if (unlikely(offset + size > pmem->size)) {
314 		dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
315 		return -EFAULT;
316 	}
317 
318 	if (rw == READ) {
319 		unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
320 
321 		if (unlikely(is_bad_pmem(&pmem->bb, offset / 512, sz_align)))
322 			return -EIO;
323 		return memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
324 	} else {
325 		memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
326 		wmb_pmem();
327 	}
328 
329 	return 0;
330 }
331 
332 static int nd_pfn_init(struct nd_pfn *nd_pfn)
333 {
334 	struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL);
335 	struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev);
336 	struct nd_namespace_common *ndns = nd_pfn->ndns;
337 	u32 start_pad = 0, end_trunc = 0;
338 	resource_size_t start, size;
339 	struct nd_namespace_io *nsio;
340 	struct nd_region *nd_region;
341 	unsigned long npfns;
342 	phys_addr_t offset;
343 	u64 checksum;
344 	int rc;
345 
346 	if (!pfn_sb)
347 		return -ENOMEM;
348 
349 	nd_pfn->pfn_sb = pfn_sb;
350 	rc = nd_pfn_validate(nd_pfn);
351 	if (rc == -ENODEV)
352 		/* no info block, do init */;
353 	else
354 		return rc;
355 
356 	nd_region = to_nd_region(nd_pfn->dev.parent);
357 	if (nd_region->ro) {
358 		dev_info(&nd_pfn->dev,
359 				"%s is read-only, unable to init metadata\n",
360 				dev_name(&nd_region->dev));
361 		goto err;
362 	}
363 
364 	memset(pfn_sb, 0, sizeof(*pfn_sb));
365 
366 	/*
367 	 * Check if pmem collides with 'System RAM' when section aligned and
368 	 * trim it accordingly
369 	 */
370 	nsio = to_nd_namespace_io(&ndns->dev);
371 	start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start);
372 	size = resource_size(&nsio->res);
373 	if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
374 				IORES_DESC_NONE) == REGION_MIXED) {
375 
376 		start = nsio->res.start;
377 		start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
378 	}
379 
380 	start = nsio->res.start;
381 	size = PHYS_SECTION_ALIGN_UP(start + size) - start;
382 	if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
383 				IORES_DESC_NONE) == REGION_MIXED) {
384 		size = resource_size(&nsio->res);
385 		end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
386 	}
387 
388 	if (start_pad + end_trunc)
389 		dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
390 				dev_name(&ndns->dev), start_pad + end_trunc);
391 
392 	/*
393 	 * Note, we use 64 here for the standard size of struct page,
394 	 * debugging options may cause it to be larger in which case the
395 	 * implementation will limit the pfns advertised through
396 	 * ->direct_access() to those that are included in the memmap.
397 	 */
398 	start += start_pad;
399 	npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K;
400 	if (nd_pfn->mode == PFN_MODE_PMEM)
401 		offset = ALIGN(start + SZ_8K + 64 * npfns, nd_pfn->align)
402 			- start;
403 	else if (nd_pfn->mode == PFN_MODE_RAM)
404 		offset = ALIGN(start + SZ_8K, nd_pfn->align) - start;
405 	else
406 		goto err;
407 
408 	if (offset + start_pad + end_trunc >= pmem->size) {
409 		dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n",
410 				dev_name(&ndns->dev));
411 		goto err;
412 	}
413 
414 	npfns = (pmem->size - offset - start_pad - end_trunc) / SZ_4K;
415 	pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
416 	pfn_sb->dataoff = cpu_to_le64(offset);
417 	pfn_sb->npfns = cpu_to_le64(npfns);
418 	memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN);
419 	memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
420 	memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
421 	pfn_sb->version_major = cpu_to_le16(1);
422 	pfn_sb->version_minor = cpu_to_le16(1);
423 	pfn_sb->start_pad = cpu_to_le32(start_pad);
424 	pfn_sb->end_trunc = cpu_to_le32(end_trunc);
425 	checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
426 	pfn_sb->checksum = cpu_to_le64(checksum);
427 
428 	rc = nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
429 	if (rc)
430 		goto err;
431 
432 	return 0;
433  err:
434 	nd_pfn->pfn_sb = NULL;
435 	kfree(pfn_sb);
436 	return -ENXIO;
437 }
438 
439 static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns)
440 {
441 	struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
442 	struct pmem_device *pmem;
443 
444 	/* free pmem disk */
445 	pmem = dev_get_drvdata(&nd_pfn->dev);
446 	pmem_detach_disk(pmem);
447 
448 	/* release nd_pfn resources */
449 	kfree(nd_pfn->pfn_sb);
450 	nd_pfn->pfn_sb = NULL;
451 
452 	return 0;
453 }
454 
455 /*
456  * We hotplug memory at section granularity, pad the reserved area from
457  * the previous section base to the namespace base address.
458  */
459 static unsigned long init_altmap_base(resource_size_t base)
460 {
461 	unsigned long base_pfn = PHYS_PFN(base);
462 
463 	return PFN_SECTION_ALIGN_DOWN(base_pfn);
464 }
465 
466 static unsigned long init_altmap_reserve(resource_size_t base)
467 {
468 	unsigned long reserve = PHYS_PFN(SZ_8K);
469 	unsigned long base_pfn = PHYS_PFN(base);
470 
471 	reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn);
472 	return reserve;
473 }
474 
475 static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn)
476 {
477 	int rc;
478 	struct resource res;
479 	struct request_queue *q;
480 	struct pmem_device *pmem;
481 	struct vmem_altmap *altmap;
482 	struct device *dev = &nd_pfn->dev;
483 	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
484 	struct nd_namespace_common *ndns = nd_pfn->ndns;
485 	u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
486 	u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
487 	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
488 	resource_size_t base = nsio->res.start + start_pad;
489 	struct vmem_altmap __altmap = {
490 		.base_pfn = init_altmap_base(base),
491 		.reserve = init_altmap_reserve(base),
492 	};
493 
494 	pmem = dev_get_drvdata(dev);
495 	pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
496 	pmem->pfn_pad = start_pad + end_trunc;
497 	nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
498 	if (nd_pfn->mode == PFN_MODE_RAM) {
499 		if (pmem->data_offset < SZ_8K)
500 			return -EINVAL;
501 		nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
502 		altmap = NULL;
503 	} else if (nd_pfn->mode == PFN_MODE_PMEM) {
504 		nd_pfn->npfns = (pmem->size - pmem->pfn_pad - pmem->data_offset)
505 			/ PAGE_SIZE;
506 		if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
507 			dev_info(&nd_pfn->dev,
508 					"number of pfns truncated from %lld to %ld\n",
509 					le64_to_cpu(nd_pfn->pfn_sb->npfns),
510 					nd_pfn->npfns);
511 		altmap = & __altmap;
512 		altmap->free = PHYS_PFN(pmem->data_offset - SZ_8K);
513 		altmap->alloc = 0;
514 	} else {
515 		rc = -ENXIO;
516 		goto err;
517 	}
518 
519 	/* establish pfn range for lookup, and switch to direct map */
520 	q = pmem->pmem_queue;
521 	memcpy(&res, &nsio->res, sizeof(res));
522 	res.start += start_pad;
523 	res.end -= end_trunc;
524 	devm_memunmap(dev, (void __force *) pmem->virt_addr);
525 	pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &res,
526 			&q->q_usage_counter, altmap);
527 	pmem->pfn_flags |= PFN_MAP;
528 	if (IS_ERR(pmem->virt_addr)) {
529 		rc = PTR_ERR(pmem->virt_addr);
530 		goto err;
531 	}
532 
533 	/* attach pmem disk in "pfn-mode" */
534 	rc = pmem_attach_disk(dev, ndns, pmem);
535 	if (rc)
536 		goto err;
537 
538 	return rc;
539  err:
540 	nvdimm_namespace_detach_pfn(ndns);
541 	return rc;
542 
543 }
544 
545 static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
546 {
547 	struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
548 	int rc;
549 
550 	if (!nd_pfn->uuid || !nd_pfn->ndns)
551 		return -ENODEV;
552 
553 	rc = nd_pfn_init(nd_pfn);
554 	if (rc)
555 		return rc;
556 	/* we need a valid pfn_sb before we can init a vmem_altmap */
557 	return __nvdimm_namespace_attach_pfn(nd_pfn);
558 }
559 
560 static int nd_pmem_probe(struct device *dev)
561 {
562 	struct nd_region *nd_region = to_nd_region(dev->parent);
563 	struct nd_namespace_common *ndns;
564 	struct nd_namespace_io *nsio;
565 	struct pmem_device *pmem;
566 
567 	ndns = nvdimm_namespace_common_probe(dev);
568 	if (IS_ERR(ndns))
569 		return PTR_ERR(ndns);
570 
571 	nsio = to_nd_namespace_io(&ndns->dev);
572 	pmem = pmem_alloc(dev, &nsio->res, nd_region->id);
573 	if (IS_ERR(pmem))
574 		return PTR_ERR(pmem);
575 
576 	pmem->ndns = ndns;
577 	dev_set_drvdata(dev, pmem);
578 	ndns->rw_bytes = pmem_rw_bytes;
579 	if (devm_init_badblocks(dev, &pmem->bb))
580 		return -ENOMEM;
581 	nvdimm_badblocks_populate(nd_region, &pmem->bb, &nsio->res);
582 
583 	if (is_nd_btt(dev)) {
584 		/* btt allocates its own request_queue */
585 		blk_cleanup_queue(pmem->pmem_queue);
586 		pmem->pmem_queue = NULL;
587 		return nvdimm_namespace_attach_btt(ndns);
588 	}
589 
590 	if (is_nd_pfn(dev))
591 		return nvdimm_namespace_attach_pfn(ndns);
592 
593 	if (nd_btt_probe(ndns, pmem) == 0 || nd_pfn_probe(ndns, pmem) == 0) {
594 		/*
595 		 * We'll come back as either btt-pmem, or pfn-pmem, so
596 		 * drop the queue allocation for now.
597 		 */
598 		blk_cleanup_queue(pmem->pmem_queue);
599 		return -ENXIO;
600 	}
601 
602 	return pmem_attach_disk(dev, ndns, pmem);
603 }
604 
605 static int nd_pmem_remove(struct device *dev)
606 {
607 	struct pmem_device *pmem = dev_get_drvdata(dev);
608 
609 	if (is_nd_btt(dev))
610 		nvdimm_namespace_detach_btt(pmem->ndns);
611 	else if (is_nd_pfn(dev))
612 		nvdimm_namespace_detach_pfn(pmem->ndns);
613 	else
614 		pmem_detach_disk(pmem);
615 
616 	return 0;
617 }
618 
619 static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
620 {
621 	struct pmem_device *pmem = dev_get_drvdata(dev);
622 	struct nd_namespace_common *ndns = pmem->ndns;
623 	struct nd_region *nd_region = to_nd_region(dev->parent);
624 	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
625 	struct resource res = {
626 		.start = nsio->res.start + pmem->data_offset,
627 		.end = nsio->res.end,
628 	};
629 
630 	if (event != NVDIMM_REVALIDATE_POISON)
631 		return;
632 
633 	if (is_nd_pfn(dev)) {
634 		struct nd_pfn *nd_pfn = to_nd_pfn(dev);
635 		struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
636 
637 		res.start += __le32_to_cpu(pfn_sb->start_pad);
638 		res.end -= __le32_to_cpu(pfn_sb->end_trunc);
639 	}
640 
641 	nvdimm_badblocks_populate(nd_region, &pmem->bb, &res);
642 }
643 
644 MODULE_ALIAS("pmem");
645 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
646 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
647 static struct nd_device_driver nd_pmem_driver = {
648 	.probe = nd_pmem_probe,
649 	.remove = nd_pmem_remove,
650 	.notify = nd_pmem_notify,
651 	.drv = {
652 		.name = "nd_pmem",
653 	},
654 	.type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
655 };
656 
657 static int __init pmem_init(void)
658 {
659 	return nd_driver_register(&nd_pmem_driver);
660 }
661 module_init(pmem_init);
662 
663 static void pmem_exit(void)
664 {
665 	driver_unregister(&nd_pmem_driver.drv);
666 }
667 module_exit(pmem_exit);
668 
669 MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
670 MODULE_LICENSE("GPL v2");
671