xref: /linux/fs/nfs/blocklayout/dev.c (revision c0c914eca7f251c70facc37dfebeaf176601918d)
1 /*
2  * Copyright (c) 2014 Christoph Hellwig.
3  */
4 #include <linux/sunrpc/svc.h>
5 #include <linux/blkdev.h>
6 #include <linux/nfs4.h>
7 #include <linux/nfs_fs.h>
8 #include <linux/nfs_xdr.h>
9 
10 #include "blocklayout.h"
11 
12 #define NFSDBG_FACILITY		NFSDBG_PNFS_LD
13 
14 static void
15 bl_free_device(struct pnfs_block_dev *dev)
16 {
17 	if (dev->nr_children) {
18 		int i;
19 
20 		for (i = 0; i < dev->nr_children; i++)
21 			bl_free_device(&dev->children[i]);
22 		kfree(dev->children);
23 	} else {
24 		if (dev->bdev)
25 			blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
26 	}
27 }
28 
29 void
30 bl_free_deviceid_node(struct nfs4_deviceid_node *d)
31 {
32 	struct pnfs_block_dev *dev =
33 		container_of(d, struct pnfs_block_dev, node);
34 
35 	bl_free_device(dev);
36 	kfree_rcu(dev, node.rcu);
37 }
38 
39 static int
40 nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
41 {
42 	__be32 *p;
43 	int i;
44 
45 	p = xdr_inline_decode(xdr, 4);
46 	if (!p)
47 		return -EIO;
48 	b->type = be32_to_cpup(p++);
49 
50 	switch (b->type) {
51 	case PNFS_BLOCK_VOLUME_SIMPLE:
52 		p = xdr_inline_decode(xdr, 4);
53 		if (!p)
54 			return -EIO;
55 		b->simple.nr_sigs = be32_to_cpup(p++);
56 		if (!b->simple.nr_sigs) {
57 			dprintk("no signature\n");
58 			return -EIO;
59 		}
60 
61 		b->simple.len = 4 + 4;
62 		for (i = 0; i < b->simple.nr_sigs; i++) {
63 			p = xdr_inline_decode(xdr, 8 + 4);
64 			if (!p)
65 				return -EIO;
66 			p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
67 			b->simple.sigs[i].sig_len = be32_to_cpup(p++);
68 			if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) {
69 				pr_info("signature too long: %d\n",
70 					b->simple.sigs[i].sig_len);
71 				return -EIO;
72 			}
73 
74 			p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
75 			if (!p)
76 				return -EIO;
77 			memcpy(&b->simple.sigs[i].sig, p,
78 				b->simple.sigs[i].sig_len);
79 
80 			b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len;
81 		}
82 		break;
83 	case PNFS_BLOCK_VOLUME_SLICE:
84 		p = xdr_inline_decode(xdr, 8 + 8 + 4);
85 		if (!p)
86 			return -EIO;
87 		p = xdr_decode_hyper(p, &b->slice.start);
88 		p = xdr_decode_hyper(p, &b->slice.len);
89 		b->slice.volume = be32_to_cpup(p++);
90 		break;
91 	case PNFS_BLOCK_VOLUME_CONCAT:
92 		p = xdr_inline_decode(xdr, 4);
93 		if (!p)
94 			return -EIO;
95 		b->concat.volumes_count = be32_to_cpup(p++);
96 
97 		p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
98 		if (!p)
99 			return -EIO;
100 		for (i = 0; i < b->concat.volumes_count; i++)
101 			b->concat.volumes[i] = be32_to_cpup(p++);
102 		break;
103 	case PNFS_BLOCK_VOLUME_STRIPE:
104 		p = xdr_inline_decode(xdr, 8 + 4);
105 		if (!p)
106 			return -EIO;
107 		p = xdr_decode_hyper(p, &b->stripe.chunk_size);
108 		b->stripe.volumes_count = be32_to_cpup(p++);
109 
110 		p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
111 		if (!p)
112 			return -EIO;
113 		for (i = 0; i < b->stripe.volumes_count; i++)
114 			b->stripe.volumes[i] = be32_to_cpup(p++);
115 		break;
116 	default:
117 		dprintk("unknown volume type!\n");
118 		return -EIO;
119 	}
120 
121 	return 0;
122 }
123 
124 static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
125 		struct pnfs_block_dev_map *map)
126 {
127 	map->start = dev->start;
128 	map->len = dev->len;
129 	map->disk_offset = dev->disk_offset;
130 	map->bdev = dev->bdev;
131 	return true;
132 }
133 
134 static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
135 		struct pnfs_block_dev_map *map)
136 {
137 	int i;
138 
139 	for (i = 0; i < dev->nr_children; i++) {
140 		struct pnfs_block_dev *child = &dev->children[i];
141 
142 		if (child->start > offset ||
143 		    child->start + child->len <= offset)
144 			continue;
145 
146 		child->map(child, offset - child->start, map);
147 		return true;
148 	}
149 
150 	dprintk("%s: ran off loop!\n", __func__);
151 	return false;
152 }
153 
154 static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
155 		struct pnfs_block_dev_map *map)
156 {
157 	struct pnfs_block_dev *child;
158 	u64 chunk;
159 	u32 chunk_idx;
160 	u64 disk_offset;
161 
162 	chunk = div_u64(offset, dev->chunk_size);
163 	div_u64_rem(chunk, dev->nr_children, &chunk_idx);
164 
165 	if (chunk_idx > dev->nr_children) {
166 		dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
167 			__func__, chunk_idx, offset, dev->chunk_size);
168 		/* error, should not happen */
169 		return false;
170 	}
171 
172 	/* truncate offset to the beginning of the stripe */
173 	offset = chunk * dev->chunk_size;
174 
175 	/* disk offset of the stripe */
176 	disk_offset = div_u64(offset, dev->nr_children);
177 
178 	child = &dev->children[chunk_idx];
179 	child->map(child, disk_offset, map);
180 
181 	map->start += offset;
182 	map->disk_offset += disk_offset;
183 	map->len = dev->chunk_size;
184 	return true;
185 }
186 
187 static int
188 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
189 		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
190 
191 
192 static int
193 bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
194 		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
195 {
196 	struct pnfs_block_volume *v = &volumes[idx];
197 	dev_t dev;
198 
199 	dev = bl_resolve_deviceid(server, v, gfp_mask);
200 	if (!dev)
201 		return -EIO;
202 
203 	d->bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
204 	if (IS_ERR(d->bdev)) {
205 		printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
206 			MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev));
207 		return PTR_ERR(d->bdev);
208 	}
209 
210 
211 	d->len = i_size_read(d->bdev->bd_inode);
212 	d->map = bl_map_simple;
213 
214 	printk(KERN_INFO "pNFS: using block device %s\n",
215 		d->bdev->bd_disk->disk_name);
216 	return 0;
217 }
218 
219 static int
220 bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
221 		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
222 {
223 	struct pnfs_block_volume *v = &volumes[idx];
224 	int ret;
225 
226 	ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
227 	if (ret)
228 		return ret;
229 
230 	d->disk_offset = v->slice.start;
231 	d->len = v->slice.len;
232 	return 0;
233 }
234 
235 static int
236 bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
237 		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
238 {
239 	struct pnfs_block_volume *v = &volumes[idx];
240 	u64 len = 0;
241 	int ret, i;
242 
243 	d->children = kcalloc(v->concat.volumes_count,
244 			sizeof(struct pnfs_block_dev), GFP_KERNEL);
245 	if (!d->children)
246 		return -ENOMEM;
247 
248 	for (i = 0; i < v->concat.volumes_count; i++) {
249 		ret = bl_parse_deviceid(server, &d->children[i],
250 				volumes, v->concat.volumes[i], gfp_mask);
251 		if (ret)
252 			return ret;
253 
254 		d->nr_children++;
255 		d->children[i].start += len;
256 		len += d->children[i].len;
257 	}
258 
259 	d->len = len;
260 	d->map = bl_map_concat;
261 	return 0;
262 }
263 
264 static int
265 bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
266 		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
267 {
268 	struct pnfs_block_volume *v = &volumes[idx];
269 	u64 len = 0;
270 	int ret, i;
271 
272 	d->children = kcalloc(v->stripe.volumes_count,
273 			sizeof(struct pnfs_block_dev), GFP_KERNEL);
274 	if (!d->children)
275 		return -ENOMEM;
276 
277 	for (i = 0; i < v->stripe.volumes_count; i++) {
278 		ret = bl_parse_deviceid(server, &d->children[i],
279 				volumes, v->stripe.volumes[i], gfp_mask);
280 		if (ret)
281 			return ret;
282 
283 		d->nr_children++;
284 		len += d->children[i].len;
285 	}
286 
287 	d->len = len;
288 	d->chunk_size = v->stripe.chunk_size;
289 	d->map = bl_map_stripe;
290 	return 0;
291 }
292 
293 static int
294 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
295 		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
296 {
297 	switch (volumes[idx].type) {
298 	case PNFS_BLOCK_VOLUME_SIMPLE:
299 		return bl_parse_simple(server, d, volumes, idx, gfp_mask);
300 	case PNFS_BLOCK_VOLUME_SLICE:
301 		return bl_parse_slice(server, d, volumes, idx, gfp_mask);
302 	case PNFS_BLOCK_VOLUME_CONCAT:
303 		return bl_parse_concat(server, d, volumes, idx, gfp_mask);
304 	case PNFS_BLOCK_VOLUME_STRIPE:
305 		return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
306 	default:
307 		dprintk("unsupported volume type: %d\n", volumes[idx].type);
308 		return -EIO;
309 	}
310 }
311 
312 struct nfs4_deviceid_node *
313 bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
314 		gfp_t gfp_mask)
315 {
316 	struct nfs4_deviceid_node *node = NULL;
317 	struct pnfs_block_volume *volumes;
318 	struct pnfs_block_dev *top;
319 	struct xdr_stream xdr;
320 	struct xdr_buf buf;
321 	struct page *scratch;
322 	int nr_volumes, ret, i;
323 	__be32 *p;
324 
325 	scratch = alloc_page(gfp_mask);
326 	if (!scratch)
327 		goto out;
328 
329 	xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
330 	xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
331 
332 	p = xdr_inline_decode(&xdr, sizeof(__be32));
333 	if (!p)
334 		goto out_free_scratch;
335 	nr_volumes = be32_to_cpup(p++);
336 
337 	volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
338 			  gfp_mask);
339 	if (!volumes)
340 		goto out_free_scratch;
341 
342 	for (i = 0; i < nr_volumes; i++) {
343 		ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
344 		if (ret < 0)
345 			goto out_free_volumes;
346 	}
347 
348 	top = kzalloc(sizeof(*top), gfp_mask);
349 	if (!top)
350 		goto out_free_volumes;
351 
352 	ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
353 	if (ret) {
354 		bl_free_device(top);
355 		kfree(top);
356 		goto out_free_volumes;
357 	}
358 
359 	node = &top->node;
360 	nfs4_init_deviceid_node(node, server, &pdev->dev_id);
361 
362 out_free_volumes:
363 	kfree(volumes);
364 out_free_scratch:
365 	__free_page(scratch);
366 out:
367 	return node;
368 }
369