xref: /linux/drivers/md/dm-pcache/cache_dev.c (revision 4f38da1f027ea2c9f01bb71daa7a299c191b6940)
1*1d57628fSDongsheng Yang // SPDX-License-Identifier: GPL-2.0-or-later
2*1d57628fSDongsheng Yang 
3*1d57628fSDongsheng Yang #include <linux/blkdev.h>
4*1d57628fSDongsheng Yang #include <linux/dax.h>
5*1d57628fSDongsheng Yang #include <linux/vmalloc.h>
6*1d57628fSDongsheng Yang #include <linux/parser.h>
7*1d57628fSDongsheng Yang 
8*1d57628fSDongsheng Yang #include "cache_dev.h"
9*1d57628fSDongsheng Yang #include "backing_dev.h"
10*1d57628fSDongsheng Yang #include "cache.h"
11*1d57628fSDongsheng Yang #include "dm_pcache.h"
12*1d57628fSDongsheng Yang 
13*1d57628fSDongsheng Yang static void cache_dev_dax_exit(struct pcache_cache_dev *cache_dev)
14*1d57628fSDongsheng Yang {
15*1d57628fSDongsheng Yang 	if (cache_dev->use_vmap)
16*1d57628fSDongsheng Yang 		vunmap(cache_dev->mapping);
17*1d57628fSDongsheng Yang }
18*1d57628fSDongsheng Yang 
19*1d57628fSDongsheng Yang static int build_vmap(struct dax_device *dax_dev, long total_pages, void **vaddr)
20*1d57628fSDongsheng Yang {
21*1d57628fSDongsheng Yang 	struct page **pages;
22*1d57628fSDongsheng Yang 	long i = 0, chunk;
23*1d57628fSDongsheng Yang 	unsigned long pfn;
24*1d57628fSDongsheng Yang 	int ret;
25*1d57628fSDongsheng Yang 
26*1d57628fSDongsheng Yang 	pages = vmalloc_array(total_pages, sizeof(struct page *));
27*1d57628fSDongsheng Yang 	if (!pages)
28*1d57628fSDongsheng Yang 		return -ENOMEM;
29*1d57628fSDongsheng Yang 
30*1d57628fSDongsheng Yang 	do {
31*1d57628fSDongsheng Yang 		chunk = dax_direct_access(dax_dev, i, total_pages - i,
32*1d57628fSDongsheng Yang 					  DAX_ACCESS, NULL, &pfn);
33*1d57628fSDongsheng Yang 		if (chunk <= 0) {
34*1d57628fSDongsheng Yang 			ret = chunk ? chunk : -EINVAL;
35*1d57628fSDongsheng Yang 			goto out_free;
36*1d57628fSDongsheng Yang 		}
37*1d57628fSDongsheng Yang 
38*1d57628fSDongsheng Yang 		if (!pfn_valid(pfn)) {
39*1d57628fSDongsheng Yang 			ret = -EOPNOTSUPP;
40*1d57628fSDongsheng Yang 			goto out_free;
41*1d57628fSDongsheng Yang 		}
42*1d57628fSDongsheng Yang 
43*1d57628fSDongsheng Yang 		while (chunk-- && i < total_pages) {
44*1d57628fSDongsheng Yang 			pages[i++] = pfn_to_page(pfn);
45*1d57628fSDongsheng Yang 			pfn++;
46*1d57628fSDongsheng Yang 			if (!(i & 15))
47*1d57628fSDongsheng Yang 				cond_resched();
48*1d57628fSDongsheng Yang 		}
49*1d57628fSDongsheng Yang 	} while (i < total_pages);
50*1d57628fSDongsheng Yang 
51*1d57628fSDongsheng Yang 	*vaddr = vmap(pages, total_pages, VM_MAP, PAGE_KERNEL);
52*1d57628fSDongsheng Yang 	if (!*vaddr) {
53*1d57628fSDongsheng Yang 		ret = -ENOMEM;
54*1d57628fSDongsheng Yang 		goto out_free;
55*1d57628fSDongsheng Yang 	}
56*1d57628fSDongsheng Yang 
57*1d57628fSDongsheng Yang 	ret = 0;
58*1d57628fSDongsheng Yang 
59*1d57628fSDongsheng Yang out_free:
60*1d57628fSDongsheng Yang 	vfree(pages);
61*1d57628fSDongsheng Yang 	return ret;
62*1d57628fSDongsheng Yang }
63*1d57628fSDongsheng Yang 
64*1d57628fSDongsheng Yang static int cache_dev_dax_init(struct pcache_cache_dev *cache_dev)
65*1d57628fSDongsheng Yang {
66*1d57628fSDongsheng Yang 	struct dm_pcache	*pcache = CACHE_DEV_TO_PCACHE(cache_dev);
67*1d57628fSDongsheng Yang 	struct dax_device	*dax_dev;
68*1d57628fSDongsheng Yang 	long			total_pages, mapped_pages;
69*1d57628fSDongsheng Yang 	u64			bdev_size;
70*1d57628fSDongsheng Yang 	void			*vaddr;
71*1d57628fSDongsheng Yang 	int			ret;
72*1d57628fSDongsheng Yang 	int			id;
73*1d57628fSDongsheng Yang 	unsigned long		pfn;
74*1d57628fSDongsheng Yang 
75*1d57628fSDongsheng Yang 	dax_dev	= cache_dev->dm_dev->dax_dev;
76*1d57628fSDongsheng Yang 	/* total size check */
77*1d57628fSDongsheng Yang 	bdev_size = bdev_nr_bytes(cache_dev->dm_dev->bdev);
78*1d57628fSDongsheng Yang 	if (bdev_size < PCACHE_CACHE_DEV_SIZE_MIN) {
79*1d57628fSDongsheng Yang 		pcache_dev_err(pcache, "dax device is too small, required at least %llu",
80*1d57628fSDongsheng Yang 				PCACHE_CACHE_DEV_SIZE_MIN);
81*1d57628fSDongsheng Yang 		ret = -ENOSPC;
82*1d57628fSDongsheng Yang 		goto out;
83*1d57628fSDongsheng Yang 	}
84*1d57628fSDongsheng Yang 
85*1d57628fSDongsheng Yang 	total_pages = bdev_size >> PAGE_SHIFT;
86*1d57628fSDongsheng Yang 	/* attempt: direct-map the whole range */
87*1d57628fSDongsheng Yang 	id = dax_read_lock();
88*1d57628fSDongsheng Yang 	mapped_pages = dax_direct_access(dax_dev, 0, total_pages,
89*1d57628fSDongsheng Yang 					 DAX_ACCESS, &vaddr, &pfn);
90*1d57628fSDongsheng Yang 	if (mapped_pages < 0) {
91*1d57628fSDongsheng Yang 		pcache_dev_err(pcache, "dax_direct_access failed: %ld\n", mapped_pages);
92*1d57628fSDongsheng Yang 		ret = mapped_pages;
93*1d57628fSDongsheng Yang 		goto unlock;
94*1d57628fSDongsheng Yang 	}
95*1d57628fSDongsheng Yang 
96*1d57628fSDongsheng Yang 	if (!pfn_valid(pfn)) {
97*1d57628fSDongsheng Yang 		ret = -EOPNOTSUPP;
98*1d57628fSDongsheng Yang 		goto unlock;
99*1d57628fSDongsheng Yang 	}
100*1d57628fSDongsheng Yang 
101*1d57628fSDongsheng Yang 	if (mapped_pages == total_pages) {
102*1d57628fSDongsheng Yang 		/* success: contiguous direct mapping */
103*1d57628fSDongsheng Yang 		cache_dev->mapping = vaddr;
104*1d57628fSDongsheng Yang 	} else {
105*1d57628fSDongsheng Yang 		/* need vmap fallback */
106*1d57628fSDongsheng Yang 		ret = build_vmap(dax_dev, total_pages, &vaddr);
107*1d57628fSDongsheng Yang 		if (ret) {
108*1d57628fSDongsheng Yang 			pcache_dev_err(pcache, "vmap fallback failed: %d\n", ret);
109*1d57628fSDongsheng Yang 			goto unlock;
110*1d57628fSDongsheng Yang 		}
111*1d57628fSDongsheng Yang 
112*1d57628fSDongsheng Yang 		cache_dev->mapping	= vaddr;
113*1d57628fSDongsheng Yang 		cache_dev->use_vmap	= true;
114*1d57628fSDongsheng Yang 	}
115*1d57628fSDongsheng Yang 	dax_read_unlock(id);
116*1d57628fSDongsheng Yang 
117*1d57628fSDongsheng Yang 	return 0;
118*1d57628fSDongsheng Yang unlock:
119*1d57628fSDongsheng Yang 	dax_read_unlock(id);
120*1d57628fSDongsheng Yang out:
121*1d57628fSDongsheng Yang 	return ret;
122*1d57628fSDongsheng Yang }
123*1d57628fSDongsheng Yang 
124*1d57628fSDongsheng Yang void cache_dev_zero_range(struct pcache_cache_dev *cache_dev, void *pos, u32 size)
125*1d57628fSDongsheng Yang {
126*1d57628fSDongsheng Yang 	memset(pos, 0, size);
127*1d57628fSDongsheng Yang 	dax_flush(cache_dev->dm_dev->dax_dev, pos, size);
128*1d57628fSDongsheng Yang }
129*1d57628fSDongsheng Yang 
130*1d57628fSDongsheng Yang static int sb_read(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb)
131*1d57628fSDongsheng Yang {
132*1d57628fSDongsheng Yang 	struct pcache_sb *sb_addr = CACHE_DEV_SB(cache_dev);
133*1d57628fSDongsheng Yang 
134*1d57628fSDongsheng Yang 	if (copy_mc_to_kernel(sb, sb_addr, sizeof(struct pcache_sb)))
135*1d57628fSDongsheng Yang 		return -EIO;
136*1d57628fSDongsheng Yang 
137*1d57628fSDongsheng Yang 	return 0;
138*1d57628fSDongsheng Yang }
139*1d57628fSDongsheng Yang 
140*1d57628fSDongsheng Yang static void sb_write(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb)
141*1d57628fSDongsheng Yang {
142*1d57628fSDongsheng Yang 	struct pcache_sb *sb_addr = CACHE_DEV_SB(cache_dev);
143*1d57628fSDongsheng Yang 
144*1d57628fSDongsheng Yang 	memcpy_flushcache(sb_addr, sb, sizeof(struct pcache_sb));
145*1d57628fSDongsheng Yang 	pmem_wmb();
146*1d57628fSDongsheng Yang }
147*1d57628fSDongsheng Yang 
148*1d57628fSDongsheng Yang static int sb_init(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb)
149*1d57628fSDongsheng Yang {
150*1d57628fSDongsheng Yang 	struct dm_pcache *pcache = CACHE_DEV_TO_PCACHE(cache_dev);
151*1d57628fSDongsheng Yang 	u64 nr_segs;
152*1d57628fSDongsheng Yang 	u64 cache_dev_size;
153*1d57628fSDongsheng Yang 	u64 magic;
154*1d57628fSDongsheng Yang 	u32 flags = 0;
155*1d57628fSDongsheng Yang 
156*1d57628fSDongsheng Yang 	magic = le64_to_cpu(sb->magic);
157*1d57628fSDongsheng Yang 	if (magic)
158*1d57628fSDongsheng Yang 		return -EEXIST;
159*1d57628fSDongsheng Yang 
160*1d57628fSDongsheng Yang 	cache_dev_size = bdev_nr_bytes(file_bdev(cache_dev->dm_dev->bdev_file));
161*1d57628fSDongsheng Yang 	if (cache_dev_size < PCACHE_CACHE_DEV_SIZE_MIN) {
162*1d57628fSDongsheng Yang 		pcache_dev_err(pcache, "dax device is too small, required at least %llu",
163*1d57628fSDongsheng Yang 				PCACHE_CACHE_DEV_SIZE_MIN);
164*1d57628fSDongsheng Yang 		return -ENOSPC;
165*1d57628fSDongsheng Yang 	}
166*1d57628fSDongsheng Yang 
167*1d57628fSDongsheng Yang 	nr_segs = (cache_dev_size - PCACHE_SEGMENTS_OFF) / ((PCACHE_SEG_SIZE));
168*1d57628fSDongsheng Yang 
169*1d57628fSDongsheng Yang #if defined(__BYTE_ORDER) ? (__BIG_ENDIAN == __BYTE_ORDER) : defined(__BIG_ENDIAN)
170*1d57628fSDongsheng Yang 	flags |= PCACHE_SB_F_BIGENDIAN;
171*1d57628fSDongsheng Yang #endif
172*1d57628fSDongsheng Yang 	sb->flags = cpu_to_le32(flags);
173*1d57628fSDongsheng Yang 	sb->magic = cpu_to_le64(PCACHE_MAGIC);
174*1d57628fSDongsheng Yang 	sb->seg_num = cpu_to_le32(nr_segs);
175*1d57628fSDongsheng Yang 	sb->crc = cpu_to_le32(crc32c(PCACHE_CRC_SEED, (void *)(sb) + 4, sizeof(struct pcache_sb) - 4));
176*1d57628fSDongsheng Yang 
177*1d57628fSDongsheng Yang 	cache_dev_zero_range(cache_dev, CACHE_DEV_CACHE_INFO(cache_dev),
178*1d57628fSDongsheng Yang 			     PCACHE_CACHE_INFO_SIZE * PCACHE_META_INDEX_MAX +
179*1d57628fSDongsheng Yang 			     PCACHE_CACHE_CTRL_SIZE);
180*1d57628fSDongsheng Yang 
181*1d57628fSDongsheng Yang 	return 0;
182*1d57628fSDongsheng Yang }
183*1d57628fSDongsheng Yang 
184*1d57628fSDongsheng Yang static int sb_validate(struct pcache_cache_dev *cache_dev, struct pcache_sb *sb)
185*1d57628fSDongsheng Yang {
186*1d57628fSDongsheng Yang 	struct dm_pcache *pcache = CACHE_DEV_TO_PCACHE(cache_dev);
187*1d57628fSDongsheng Yang 	u32 flags;
188*1d57628fSDongsheng Yang 	u32 crc;
189*1d57628fSDongsheng Yang 
190*1d57628fSDongsheng Yang 	if (le64_to_cpu(sb->magic) != PCACHE_MAGIC) {
191*1d57628fSDongsheng Yang 		pcache_dev_err(pcache, "unexpected magic: %llx\n",
192*1d57628fSDongsheng Yang 				le64_to_cpu(sb->magic));
193*1d57628fSDongsheng Yang 		return -EINVAL;
194*1d57628fSDongsheng Yang 	}
195*1d57628fSDongsheng Yang 
196*1d57628fSDongsheng Yang 	crc = crc32c(PCACHE_CRC_SEED, (void *)(sb) + 4, sizeof(struct pcache_sb) - 4);
197*1d57628fSDongsheng Yang 	if (crc != le32_to_cpu(sb->crc)) {
198*1d57628fSDongsheng Yang 		pcache_dev_err(pcache, "corrupted sb: %u, expected: %u\n", crc, le32_to_cpu(sb->crc));
199*1d57628fSDongsheng Yang 		return -EINVAL;
200*1d57628fSDongsheng Yang 	}
201*1d57628fSDongsheng Yang 
202*1d57628fSDongsheng Yang 	flags = le32_to_cpu(sb->flags);
203*1d57628fSDongsheng Yang #if defined(__BYTE_ORDER) ? (__BIG_ENDIAN == __BYTE_ORDER) : defined(__BIG_ENDIAN)
204*1d57628fSDongsheng Yang 	if (!(flags & PCACHE_SB_F_BIGENDIAN)) {
205*1d57628fSDongsheng Yang 		pcache_dev_err(pcache, "cache_dev is not big endian\n");
206*1d57628fSDongsheng Yang 		return -EINVAL;
207*1d57628fSDongsheng Yang 	}
208*1d57628fSDongsheng Yang #else
209*1d57628fSDongsheng Yang 	if (flags & PCACHE_SB_F_BIGENDIAN) {
210*1d57628fSDongsheng Yang 		pcache_dev_err(pcache, "cache_dev is big endian\n");
211*1d57628fSDongsheng Yang 		return -EINVAL;
212*1d57628fSDongsheng Yang 	}
213*1d57628fSDongsheng Yang #endif
214*1d57628fSDongsheng Yang 	return 0;
215*1d57628fSDongsheng Yang }
216*1d57628fSDongsheng Yang 
217*1d57628fSDongsheng Yang static int cache_dev_init(struct pcache_cache_dev *cache_dev, u32 seg_num)
218*1d57628fSDongsheng Yang {
219*1d57628fSDongsheng Yang 	cache_dev->seg_num = seg_num;
220*1d57628fSDongsheng Yang 	cache_dev->seg_bitmap = kvcalloc(BITS_TO_LONGS(cache_dev->seg_num), sizeof(unsigned long), GFP_KERNEL);
221*1d57628fSDongsheng Yang 	if (!cache_dev->seg_bitmap)
222*1d57628fSDongsheng Yang 		return -ENOMEM;
223*1d57628fSDongsheng Yang 
224*1d57628fSDongsheng Yang 	return 0;
225*1d57628fSDongsheng Yang }
226*1d57628fSDongsheng Yang 
227*1d57628fSDongsheng Yang static void cache_dev_exit(struct pcache_cache_dev *cache_dev)
228*1d57628fSDongsheng Yang {
229*1d57628fSDongsheng Yang 	kvfree(cache_dev->seg_bitmap);
230*1d57628fSDongsheng Yang }
231*1d57628fSDongsheng Yang 
232*1d57628fSDongsheng Yang void cache_dev_stop(struct dm_pcache *pcache)
233*1d57628fSDongsheng Yang {
234*1d57628fSDongsheng Yang 	struct pcache_cache_dev *cache_dev = &pcache->cache_dev;
235*1d57628fSDongsheng Yang 
236*1d57628fSDongsheng Yang 	cache_dev_exit(cache_dev);
237*1d57628fSDongsheng Yang 	cache_dev_dax_exit(cache_dev);
238*1d57628fSDongsheng Yang }
239*1d57628fSDongsheng Yang 
240*1d57628fSDongsheng Yang int cache_dev_start(struct dm_pcache *pcache)
241*1d57628fSDongsheng Yang {
242*1d57628fSDongsheng Yang 	struct pcache_cache_dev *cache_dev = &pcache->cache_dev;
243*1d57628fSDongsheng Yang 	struct pcache_sb sb;
244*1d57628fSDongsheng Yang 	bool format = false;
245*1d57628fSDongsheng Yang 	int ret;
246*1d57628fSDongsheng Yang 
247*1d57628fSDongsheng Yang 	mutex_init(&cache_dev->seg_lock);
248*1d57628fSDongsheng Yang 
249*1d57628fSDongsheng Yang 	ret = cache_dev_dax_init(cache_dev);
250*1d57628fSDongsheng Yang 	if (ret) {
251*1d57628fSDongsheng Yang 		pcache_dev_err(pcache, "failed to init cache_dev %s via dax way: %d.",
252*1d57628fSDongsheng Yang 			       cache_dev->dm_dev->name, ret);
253*1d57628fSDongsheng Yang 		goto err;
254*1d57628fSDongsheng Yang 	}
255*1d57628fSDongsheng Yang 
256*1d57628fSDongsheng Yang 	ret = sb_read(cache_dev, &sb);
257*1d57628fSDongsheng Yang 	if (ret)
258*1d57628fSDongsheng Yang 		goto dax_release;
259*1d57628fSDongsheng Yang 
260*1d57628fSDongsheng Yang 	if (le64_to_cpu(sb.magic) == 0) {
261*1d57628fSDongsheng Yang 		format = true;
262*1d57628fSDongsheng Yang 		ret = sb_init(cache_dev, &sb);
263*1d57628fSDongsheng Yang 		if (ret < 0)
264*1d57628fSDongsheng Yang 			goto dax_release;
265*1d57628fSDongsheng Yang 	}
266*1d57628fSDongsheng Yang 
267*1d57628fSDongsheng Yang 	ret = sb_validate(cache_dev, &sb);
268*1d57628fSDongsheng Yang 	if (ret)
269*1d57628fSDongsheng Yang 		goto dax_release;
270*1d57628fSDongsheng Yang 
271*1d57628fSDongsheng Yang 	cache_dev->sb_flags = le32_to_cpu(sb.flags);
272*1d57628fSDongsheng Yang 	ret = cache_dev_init(cache_dev, le32_to_cpu(sb.seg_num));
273*1d57628fSDongsheng Yang 	if (ret)
274*1d57628fSDongsheng Yang 		goto dax_release;
275*1d57628fSDongsheng Yang 
276*1d57628fSDongsheng Yang 	if (format)
277*1d57628fSDongsheng Yang 		sb_write(cache_dev, &sb);
278*1d57628fSDongsheng Yang 
279*1d57628fSDongsheng Yang 	return 0;
280*1d57628fSDongsheng Yang 
281*1d57628fSDongsheng Yang dax_release:
282*1d57628fSDongsheng Yang 	cache_dev_dax_exit(cache_dev);
283*1d57628fSDongsheng Yang err:
284*1d57628fSDongsheng Yang 	return ret;
285*1d57628fSDongsheng Yang }
286*1d57628fSDongsheng Yang 
287*1d57628fSDongsheng Yang int cache_dev_get_empty_segment_id(struct pcache_cache_dev *cache_dev, u32 *seg_id)
288*1d57628fSDongsheng Yang {
289*1d57628fSDongsheng Yang 	int ret;
290*1d57628fSDongsheng Yang 
291*1d57628fSDongsheng Yang 	mutex_lock(&cache_dev->seg_lock);
292*1d57628fSDongsheng Yang 	*seg_id = find_next_zero_bit(cache_dev->seg_bitmap, cache_dev->seg_num, 0);
293*1d57628fSDongsheng Yang 	if (*seg_id == cache_dev->seg_num) {
294*1d57628fSDongsheng Yang 		ret = -ENOSPC;
295*1d57628fSDongsheng Yang 		goto unlock;
296*1d57628fSDongsheng Yang 	}
297*1d57628fSDongsheng Yang 
298*1d57628fSDongsheng Yang 	__set_bit(*seg_id, cache_dev->seg_bitmap);
299*1d57628fSDongsheng Yang 	ret = 0;
300*1d57628fSDongsheng Yang unlock:
301*1d57628fSDongsheng Yang 	mutex_unlock(&cache_dev->seg_lock);
302*1d57628fSDongsheng Yang 	return ret;
303*1d57628fSDongsheng Yang }
304