xref: /linux/fs/nfs/blocklayout/dev.c (revision 005438a8eef063495ac059d128eea71b58de50e5)
1 /*
2  * Copyright (c) 2014 Christoph Hellwig.
3  */
4 #include <linux/sunrpc/svc.h>
5 #include <linux/blkdev.h>
6 #include <linux/nfs4.h>
7 #include <linux/nfs_fs.h>
8 #include <linux/nfs_xdr.h>
9 
10 #include "blocklayout.h"
11 
12 #define NFSDBG_FACILITY		NFSDBG_PNFS_LD
13 
14 static void
15 bl_free_device(struct pnfs_block_dev *dev)
16 {
17 	if (dev->nr_children) {
18 		int i;
19 
20 		for (i = 0; i < dev->nr_children; i++)
21 			bl_free_device(&dev->children[i]);
22 		kfree(dev->children);
23 	} else {
24 		if (dev->bdev)
25 			blkdev_put(dev->bdev, FMODE_READ);
26 	}
27 }
28 
29 void
30 bl_free_deviceid_node(struct nfs4_deviceid_node *d)
31 {
32 	struct pnfs_block_dev *dev =
33 		container_of(d, struct pnfs_block_dev, node);
34 
35 	bl_free_device(dev);
36 	kfree_rcu(dev, node.rcu);
37 }
38 
39 static int
40 nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
41 {
42 	__be32 *p;
43 	int i;
44 
45 	p = xdr_inline_decode(xdr, 4);
46 	if (!p)
47 		return -EIO;
48 	b->type = be32_to_cpup(p++);
49 
50 	switch (b->type) {
51 	case PNFS_BLOCK_VOLUME_SIMPLE:
52 		p = xdr_inline_decode(xdr, 4);
53 		if (!p)
54 			return -EIO;
55 		b->simple.nr_sigs = be32_to_cpup(p++);
56 		if (!b->simple.nr_sigs) {
57 			dprintk("no signature\n");
58 			return -EIO;
59 		}
60 
61 		b->simple.len = 4 + 4;
62 		for (i = 0; i < b->simple.nr_sigs; i++) {
63 			p = xdr_inline_decode(xdr, 8 + 4);
64 			if (!p)
65 				return -EIO;
66 			p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
67 			b->simple.sigs[i].sig_len = be32_to_cpup(p++);
68 
69 			p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
70 			if (!p)
71 				return -EIO;
72 			memcpy(&b->simple.sigs[i].sig, p,
73 				b->simple.sigs[i].sig_len);
74 
75 			b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len;
76 		}
77 		break;
78 	case PNFS_BLOCK_VOLUME_SLICE:
79 		p = xdr_inline_decode(xdr, 8 + 8 + 4);
80 		if (!p)
81 			return -EIO;
82 		p = xdr_decode_hyper(p, &b->slice.start);
83 		p = xdr_decode_hyper(p, &b->slice.len);
84 		b->slice.volume = be32_to_cpup(p++);
85 		break;
86 	case PNFS_BLOCK_VOLUME_CONCAT:
87 		p = xdr_inline_decode(xdr, 4);
88 		if (!p)
89 			return -EIO;
90 		b->concat.volumes_count = be32_to_cpup(p++);
91 
92 		p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
93 		if (!p)
94 			return -EIO;
95 		for (i = 0; i < b->concat.volumes_count; i++)
96 			b->concat.volumes[i] = be32_to_cpup(p++);
97 		break;
98 	case PNFS_BLOCK_VOLUME_STRIPE:
99 		p = xdr_inline_decode(xdr, 8 + 4);
100 		if (!p)
101 			return -EIO;
102 		p = xdr_decode_hyper(p, &b->stripe.chunk_size);
103 		b->stripe.volumes_count = be32_to_cpup(p++);
104 
105 		p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
106 		if (!p)
107 			return -EIO;
108 		for (i = 0; i < b->stripe.volumes_count; i++)
109 			b->stripe.volumes[i] = be32_to_cpup(p++);
110 		break;
111 	default:
112 		dprintk("unknown volume type!\n");
113 		return -EIO;
114 	}
115 
116 	return 0;
117 }
118 
119 static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
120 		struct pnfs_block_dev_map *map)
121 {
122 	map->start = dev->start;
123 	map->len = dev->len;
124 	map->disk_offset = dev->disk_offset;
125 	map->bdev = dev->bdev;
126 	return true;
127 }
128 
129 static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
130 		struct pnfs_block_dev_map *map)
131 {
132 	int i;
133 
134 	for (i = 0; i < dev->nr_children; i++) {
135 		struct pnfs_block_dev *child = &dev->children[i];
136 
137 		if (child->start > offset ||
138 		    child->start + child->len <= offset)
139 			continue;
140 
141 		child->map(child, offset - child->start, map);
142 		return true;
143 	}
144 
145 	dprintk("%s: ran off loop!\n", __func__);
146 	return false;
147 }
148 
149 static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
150 		struct pnfs_block_dev_map *map)
151 {
152 	struct pnfs_block_dev *child;
153 	u64 chunk;
154 	u32 chunk_idx;
155 	u64 disk_offset;
156 
157 	chunk = div_u64(offset, dev->chunk_size);
158 	div_u64_rem(chunk, dev->nr_children, &chunk_idx);
159 
160 	if (chunk_idx > dev->nr_children) {
161 		dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
162 			__func__, chunk_idx, offset, dev->chunk_size);
163 		/* error, should not happen */
164 		return false;
165 	}
166 
167 	/* truncate offset to the beginning of the stripe */
168 	offset = chunk * dev->chunk_size;
169 
170 	/* disk offset of the stripe */
171 	disk_offset = div_u64(offset, dev->nr_children);
172 
173 	child = &dev->children[chunk_idx];
174 	child->map(child, disk_offset, map);
175 
176 	map->start += offset;
177 	map->disk_offset += disk_offset;
178 	map->len = dev->chunk_size;
179 	return true;
180 }
181 
182 static int
183 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
184 		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
185 
186 
187 static int
188 bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
189 		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
190 {
191 	struct pnfs_block_volume *v = &volumes[idx];
192 	dev_t dev;
193 
194 	dev = bl_resolve_deviceid(server, v, gfp_mask);
195 	if (!dev)
196 		return -EIO;
197 
198 	d->bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL);
199 	if (IS_ERR(d->bdev)) {
200 		printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
201 			MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev));
202 		return PTR_ERR(d->bdev);
203 	}
204 
205 
206 	d->len = i_size_read(d->bdev->bd_inode);
207 	d->map = bl_map_simple;
208 
209 	printk(KERN_INFO "pNFS: using block device %s\n",
210 		d->bdev->bd_disk->disk_name);
211 	return 0;
212 }
213 
214 static int
215 bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
216 		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
217 {
218 	struct pnfs_block_volume *v = &volumes[idx];
219 	int ret;
220 
221 	ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
222 	if (ret)
223 		return ret;
224 
225 	d->disk_offset = v->slice.start;
226 	d->len = v->slice.len;
227 	return 0;
228 }
229 
230 static int
231 bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
232 		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
233 {
234 	struct pnfs_block_volume *v = &volumes[idx];
235 	u64 len = 0;
236 	int ret, i;
237 
238 	d->children = kcalloc(v->concat.volumes_count,
239 			sizeof(struct pnfs_block_dev), GFP_KERNEL);
240 	if (!d->children)
241 		return -ENOMEM;
242 
243 	for (i = 0; i < v->concat.volumes_count; i++) {
244 		ret = bl_parse_deviceid(server, &d->children[i],
245 				volumes, v->concat.volumes[i], gfp_mask);
246 		if (ret)
247 			return ret;
248 
249 		d->nr_children++;
250 		d->children[i].start += len;
251 		len += d->children[i].len;
252 	}
253 
254 	d->len = len;
255 	d->map = bl_map_concat;
256 	return 0;
257 }
258 
259 static int
260 bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
261 		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
262 {
263 	struct pnfs_block_volume *v = &volumes[idx];
264 	u64 len = 0;
265 	int ret, i;
266 
267 	d->children = kcalloc(v->stripe.volumes_count,
268 			sizeof(struct pnfs_block_dev), GFP_KERNEL);
269 	if (!d->children)
270 		return -ENOMEM;
271 
272 	for (i = 0; i < v->stripe.volumes_count; i++) {
273 		ret = bl_parse_deviceid(server, &d->children[i],
274 				volumes, v->stripe.volumes[i], gfp_mask);
275 		if (ret)
276 			return ret;
277 
278 		d->nr_children++;
279 		len += d->children[i].len;
280 	}
281 
282 	d->len = len;
283 	d->chunk_size = v->stripe.chunk_size;
284 	d->map = bl_map_stripe;
285 	return 0;
286 }
287 
288 static int
289 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
290 		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
291 {
292 	switch (volumes[idx].type) {
293 	case PNFS_BLOCK_VOLUME_SIMPLE:
294 		return bl_parse_simple(server, d, volumes, idx, gfp_mask);
295 	case PNFS_BLOCK_VOLUME_SLICE:
296 		return bl_parse_slice(server, d, volumes, idx, gfp_mask);
297 	case PNFS_BLOCK_VOLUME_CONCAT:
298 		return bl_parse_concat(server, d, volumes, idx, gfp_mask);
299 	case PNFS_BLOCK_VOLUME_STRIPE:
300 		return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
301 	default:
302 		dprintk("unsupported volume type: %d\n", volumes[idx].type);
303 		return -EIO;
304 	}
305 }
306 
307 struct nfs4_deviceid_node *
308 bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
309 		gfp_t gfp_mask)
310 {
311 	struct nfs4_deviceid_node *node = NULL;
312 	struct pnfs_block_volume *volumes;
313 	struct pnfs_block_dev *top;
314 	struct xdr_stream xdr;
315 	struct xdr_buf buf;
316 	struct page *scratch;
317 	int nr_volumes, ret, i;
318 	__be32 *p;
319 
320 	scratch = alloc_page(gfp_mask);
321 	if (!scratch)
322 		goto out;
323 
324 	xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
325 	xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
326 
327 	p = xdr_inline_decode(&xdr, sizeof(__be32));
328 	if (!p)
329 		goto out_free_scratch;
330 	nr_volumes = be32_to_cpup(p++);
331 
332 	volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
333 			  gfp_mask);
334 	if (!volumes)
335 		goto out_free_scratch;
336 
337 	for (i = 0; i < nr_volumes; i++) {
338 		ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
339 		if (ret < 0)
340 			goto out_free_volumes;
341 	}
342 
343 	top = kzalloc(sizeof(*top), gfp_mask);
344 	if (!top)
345 		goto out_free_volumes;
346 
347 	ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
348 	if (ret) {
349 		bl_free_device(top);
350 		kfree(top);
351 		goto out_free_volumes;
352 	}
353 
354 	node = &top->node;
355 	nfs4_init_deviceid_node(node, server, &pdev->dev_id);
356 
357 out_free_volumes:
358 	kfree(volumes);
359 out_free_scratch:
360 	__free_page(scratch);
361 out:
362 	return node;
363 }
364