xref: /linux/fs/nfs/blocklayout/extent_tree.c (revision ccc1ead23c3311be76e87d1b06620f6cb697b42a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2014-2016 Christoph Hellwig.
4  */
5 
6 #include <linux/vmalloc.h>
7 
8 #include "blocklayout.h"
9 #include "../nfs4trace.h"
10 
11 #define NFSDBG_FACILITY		NFSDBG_PNFS_LD
12 
13 static inline struct pnfs_block_extent *
ext_node(struct rb_node * node)14 ext_node(struct rb_node *node)
15 {
16 	return rb_entry(node, struct pnfs_block_extent, be_node);
17 }
18 
19 static struct pnfs_block_extent *
ext_tree_first(struct rb_root * root)20 ext_tree_first(struct rb_root *root)
21 {
22 	struct rb_node *node = rb_first(root);
23 	return node ? ext_node(node) : NULL;
24 }
25 
26 static struct pnfs_block_extent *
ext_tree_prev(struct pnfs_block_extent * be)27 ext_tree_prev(struct pnfs_block_extent *be)
28 {
29 	struct rb_node *node = rb_prev(&be->be_node);
30 	return node ? ext_node(node) : NULL;
31 }
32 
33 static struct pnfs_block_extent *
ext_tree_next(struct pnfs_block_extent * be)34 ext_tree_next(struct pnfs_block_extent *be)
35 {
36 	struct rb_node *node = rb_next(&be->be_node);
37 	return node ? ext_node(node) : NULL;
38 }
39 
40 static inline sector_t
ext_f_end(struct pnfs_block_extent * be)41 ext_f_end(struct pnfs_block_extent *be)
42 {
43 	return be->be_f_offset + be->be_length;
44 }
45 
46 static struct pnfs_block_extent *
__ext_tree_search(struct rb_root * root,sector_t start)47 __ext_tree_search(struct rb_root *root, sector_t start)
48 {
49 	struct rb_node *node = root->rb_node;
50 	struct pnfs_block_extent *be = NULL;
51 
52 	while (node) {
53 		be = ext_node(node);
54 		if (start < be->be_f_offset)
55 			node = node->rb_left;
56 		else if (start >= ext_f_end(be))
57 			node = node->rb_right;
58 		else
59 			return be;
60 	}
61 
62 	if (be) {
63 		if (start < be->be_f_offset)
64 			return be;
65 
66 		if (start >= ext_f_end(be))
67 			return ext_tree_next(be);
68 	}
69 
70 	return NULL;
71 }
72 
73 static bool
ext_can_merge(struct pnfs_block_extent * be1,struct pnfs_block_extent * be2)74 ext_can_merge(struct pnfs_block_extent *be1, struct pnfs_block_extent *be2)
75 {
76 	if (be1->be_state != be2->be_state)
77 		return false;
78 	if (be1->be_device != be2->be_device)
79 		return false;
80 
81 	if (be1->be_f_offset + be1->be_length != be2->be_f_offset)
82 		return false;
83 
84 	if (be1->be_state != PNFS_BLOCK_NONE_DATA &&
85 	    (be1->be_v_offset + be1->be_length != be2->be_v_offset))
86 		return false;
87 
88 	if (be1->be_state == PNFS_BLOCK_INVALID_DATA &&
89 	    be1->be_tag != be2->be_tag)
90 		return false;
91 
92 	return true;
93 }
94 
95 static struct pnfs_block_extent *
ext_try_to_merge_left(struct rb_root * root,struct pnfs_block_extent * be)96 ext_try_to_merge_left(struct rb_root *root, struct pnfs_block_extent *be)
97 {
98 	struct pnfs_block_extent *left = ext_tree_prev(be);
99 
100 	if (left && ext_can_merge(left, be)) {
101 		left->be_length += be->be_length;
102 		rb_erase(&be->be_node, root);
103 		nfs4_put_deviceid_node(be->be_device);
104 		kfree(be);
105 		return left;
106 	}
107 
108 	return be;
109 }
110 
111 static struct pnfs_block_extent *
ext_try_to_merge_right(struct rb_root * root,struct pnfs_block_extent * be)112 ext_try_to_merge_right(struct rb_root *root, struct pnfs_block_extent *be)
113 {
114 	struct pnfs_block_extent *right = ext_tree_next(be);
115 
116 	if (right && ext_can_merge(be, right)) {
117 		be->be_length += right->be_length;
118 		rb_erase(&right->be_node, root);
119 		nfs4_put_deviceid_node(right->be_device);
120 		kfree(right);
121 	}
122 
123 	return be;
124 }
125 
__ext_put_deviceids(struct list_head * head)126 static void __ext_put_deviceids(struct list_head *head)
127 {
128 	struct pnfs_block_extent *be, *tmp;
129 
130 	list_for_each_entry_safe(be, tmp, head, be_list) {
131 		nfs4_put_deviceid_node(be->be_device);
132 		kfree(be);
133 	}
134 }
135 
136 static void
__ext_tree_insert(struct rb_root * root,struct pnfs_block_extent * new,bool merge_ok)137 __ext_tree_insert(struct rb_root *root,
138 		struct pnfs_block_extent *new, bool merge_ok)
139 {
140 	struct rb_node **p = &root->rb_node, *parent = NULL;
141 	struct pnfs_block_extent *be;
142 
143 	while (*p) {
144 		parent = *p;
145 		be = ext_node(parent);
146 
147 		if (new->be_f_offset < be->be_f_offset) {
148 			if (merge_ok && ext_can_merge(new, be)) {
149 				be->be_f_offset = new->be_f_offset;
150 				if (be->be_state != PNFS_BLOCK_NONE_DATA)
151 					be->be_v_offset = new->be_v_offset;
152 				be->be_length += new->be_length;
153 				be = ext_try_to_merge_left(root, be);
154 				goto free_new;
155 			}
156 			p = &(*p)->rb_left;
157 		} else if (new->be_f_offset >= ext_f_end(be)) {
158 			if (merge_ok && ext_can_merge(be, new)) {
159 				be->be_length += new->be_length;
160 				be = ext_try_to_merge_right(root, be);
161 				goto free_new;
162 			}
163 			p = &(*p)->rb_right;
164 		} else {
165 			BUG();
166 		}
167 	}
168 
169 	rb_link_node(&new->be_node, parent, p);
170 	rb_insert_color(&new->be_node, root);
171 	return;
172 free_new:
173 	nfs4_put_deviceid_node(new->be_device);
174 	kfree(new);
175 }
176 
177 static int
__ext_tree_remove(struct rb_root * root,sector_t start,sector_t end,struct list_head * tmp)178 __ext_tree_remove(struct rb_root *root,
179 		sector_t start, sector_t end, struct list_head *tmp)
180 {
181 	struct pnfs_block_extent *be;
182 	sector_t len1 = 0, len2 = 0;
183 	sector_t orig_v_offset;
184 	sector_t orig_len;
185 
186 	be = __ext_tree_search(root, start);
187 	if (!be)
188 		return 0;
189 	if (be->be_f_offset >= end)
190 		return 0;
191 
192 	orig_v_offset = be->be_v_offset;
193 	orig_len = be->be_length;
194 
195 	if (start > be->be_f_offset)
196 		len1 = start - be->be_f_offset;
197 	if (ext_f_end(be) > end)
198 		len2 = ext_f_end(be) - end;
199 
200 	if (len2 > 0) {
201 		if (len1 > 0) {
202 			struct pnfs_block_extent *new;
203 
204 			new = kzalloc(sizeof(*new), GFP_ATOMIC);
205 			if (!new)
206 				return -ENOMEM;
207 
208 			be->be_length = len1;
209 
210 			new->be_f_offset = end;
211 			if (be->be_state != PNFS_BLOCK_NONE_DATA) {
212 				new->be_v_offset =
213 					orig_v_offset + orig_len - len2;
214 			}
215 			new->be_length = len2;
216 			new->be_state = be->be_state;
217 			new->be_tag = be->be_tag;
218 			new->be_device = nfs4_get_deviceid(be->be_device);
219 
220 			__ext_tree_insert(root, new, true);
221 		} else {
222 			be->be_f_offset = end;
223 			if (be->be_state != PNFS_BLOCK_NONE_DATA) {
224 				be->be_v_offset =
225 					orig_v_offset + orig_len - len2;
226 			}
227 			be->be_length = len2;
228 		}
229 	} else {
230 		if (len1 > 0) {
231 			be->be_length = len1;
232 			be = ext_tree_next(be);
233 		}
234 
235 		while (be && ext_f_end(be) <= end) {
236 			struct pnfs_block_extent *next = ext_tree_next(be);
237 
238 			rb_erase(&be->be_node, root);
239 			list_add_tail(&be->be_list, tmp);
240 			be = next;
241 		}
242 
243 		if (be && be->be_f_offset < end) {
244 			len1 = ext_f_end(be) - end;
245 			be->be_f_offset = end;
246 			if (be->be_state != PNFS_BLOCK_NONE_DATA)
247 				be->be_v_offset += be->be_length - len1;
248 			be->be_length = len1;
249 		}
250 	}
251 
252 	return 0;
253 }
254 
255 int
ext_tree_insert(struct pnfs_block_layout * bl,struct pnfs_block_extent * new)256 ext_tree_insert(struct pnfs_block_layout *bl, struct pnfs_block_extent *new)
257 {
258 	struct pnfs_block_extent *be;
259 	struct rb_root *root;
260 	int err = 0;
261 
262 	switch (new->be_state) {
263 	case PNFS_BLOCK_READWRITE_DATA:
264 	case PNFS_BLOCK_INVALID_DATA:
265 		root = &bl->bl_ext_rw;
266 		break;
267 	case PNFS_BLOCK_READ_DATA:
268 	case PNFS_BLOCK_NONE_DATA:
269 		root = &bl->bl_ext_ro;
270 		break;
271 	default:
272 		dprintk("invalid extent type\n");
273 		return -EINVAL;
274 	}
275 
276 	spin_lock(&bl->bl_ext_lock);
277 retry:
278 	be = __ext_tree_search(root, new->be_f_offset);
279 	if (!be || be->be_f_offset >= ext_f_end(new)) {
280 		__ext_tree_insert(root, new, true);
281 	} else if (new->be_f_offset >= be->be_f_offset) {
282 		if (ext_f_end(new) <= ext_f_end(be)) {
283 			nfs4_put_deviceid_node(new->be_device);
284 			kfree(new);
285 		} else {
286 			sector_t new_len = ext_f_end(new) - ext_f_end(be);
287 			sector_t diff = new->be_length - new_len;
288 
289 			new->be_f_offset += diff;
290 			new->be_v_offset += diff;
291 			new->be_length = new_len;
292 			goto retry;
293 		}
294 	} else if (ext_f_end(new) <= ext_f_end(be)) {
295 		new->be_length = be->be_f_offset - new->be_f_offset;
296 		__ext_tree_insert(root, new, true);
297 	} else {
298 		struct pnfs_block_extent *split;
299 		sector_t new_len = ext_f_end(new) - ext_f_end(be);
300 		sector_t diff = new->be_length - new_len;
301 
302 		split = kmemdup(new, sizeof(*new), GFP_ATOMIC);
303 		if (!split) {
304 			err = -EINVAL;
305 			goto out;
306 		}
307 
308 		split->be_length = be->be_f_offset - split->be_f_offset;
309 		split->be_device = nfs4_get_deviceid(new->be_device);
310 		__ext_tree_insert(root, split, true);
311 
312 		new->be_f_offset += diff;
313 		new->be_v_offset += diff;
314 		new->be_length = new_len;
315 		goto retry;
316 	}
317 out:
318 	spin_unlock(&bl->bl_ext_lock);
319 	return err;
320 }
321 
322 static bool
__ext_tree_lookup(struct rb_root * root,sector_t isect,struct pnfs_block_extent * ret)323 __ext_tree_lookup(struct rb_root *root, sector_t isect,
324 		struct pnfs_block_extent *ret)
325 {
326 	struct rb_node *node;
327 	struct pnfs_block_extent *be;
328 
329 	node = root->rb_node;
330 	while (node) {
331 		be = ext_node(node);
332 		if (isect < be->be_f_offset)
333 			node = node->rb_left;
334 		else if (isect >= ext_f_end(be))
335 			node = node->rb_right;
336 		else {
337 			*ret = *be;
338 			return true;
339 		}
340 	}
341 
342 	return false;
343 }
344 
345 bool
ext_tree_lookup(struct pnfs_block_layout * bl,sector_t isect,struct pnfs_block_extent * ret,bool rw)346 ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect,
347 	    struct pnfs_block_extent *ret, bool rw)
348 {
349 	bool found = false;
350 
351 	spin_lock(&bl->bl_ext_lock);
352 	if (!rw)
353 		found = __ext_tree_lookup(&bl->bl_ext_ro, isect, ret);
354 	if (!found)
355 		found = __ext_tree_lookup(&bl->bl_ext_rw, isect, ret);
356 	spin_unlock(&bl->bl_ext_lock);
357 
358 	return found;
359 }
360 
ext_tree_remove(struct pnfs_block_layout * bl,bool rw,sector_t start,sector_t end)361 int ext_tree_remove(struct pnfs_block_layout *bl, bool rw,
362 		sector_t start, sector_t end)
363 {
364 	int err, err2;
365 	LIST_HEAD(tmp);
366 
367 	spin_lock(&bl->bl_ext_lock);
368 	err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
369 	if (rw) {
370 		err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end, &tmp);
371 		if (!err)
372 			err = err2;
373 	}
374 	spin_unlock(&bl->bl_ext_lock);
375 
376 	__ext_put_deviceids(&tmp);
377 	return err;
378 }
379 
380 static int
ext_tree_split(struct rb_root * root,struct pnfs_block_extent * be,sector_t split)381 ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be,
382 		sector_t split)
383 {
384 	struct pnfs_block_extent *new;
385 	sector_t orig_len = be->be_length;
386 
387 	new = kzalloc(sizeof(*new), GFP_ATOMIC);
388 	if (!new)
389 		return -ENOMEM;
390 
391 	be->be_length = split - be->be_f_offset;
392 
393 	new->be_f_offset = split;
394 	if (be->be_state != PNFS_BLOCK_NONE_DATA)
395 		new->be_v_offset = be->be_v_offset + be->be_length;
396 	new->be_length = orig_len - be->be_length;
397 	new->be_state = be->be_state;
398 	new->be_tag = be->be_tag;
399 	new->be_device = nfs4_get_deviceid(be->be_device);
400 
401 	__ext_tree_insert(root, new, false);
402 	return 0;
403 }
404 
405 int
ext_tree_mark_written(struct pnfs_block_layout * bl,sector_t start,sector_t len,u64 lwb)406 ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
407 		sector_t len, u64 lwb)
408 {
409 	struct rb_root *root = &bl->bl_ext_rw;
410 	sector_t end = start + len;
411 	struct pnfs_block_extent *be;
412 	int err = 0;
413 	LIST_HEAD(tmp);
414 
415 	spin_lock(&bl->bl_ext_lock);
416 	/*
417 	 * First remove all COW extents or holes from written to range.
418 	 */
419 	err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
420 	if (err)
421 		goto out;
422 
423 	/*
424 	 * Then mark all invalid extents in the range as written to.
425 	 */
426 	for (be = __ext_tree_search(root, start); be; be = ext_tree_next(be)) {
427 		if (be->be_f_offset >= end)
428 			break;
429 
430 		if (be->be_state != PNFS_BLOCK_INVALID_DATA || be->be_tag)
431 			continue;
432 
433 		if (be->be_f_offset < start) {
434 			struct pnfs_block_extent *left = ext_tree_prev(be);
435 
436 			if (left && ext_can_merge(left, be)) {
437 				sector_t diff = start - be->be_f_offset;
438 
439 				left->be_length += diff;
440 
441 				be->be_f_offset += diff;
442 				be->be_v_offset += diff;
443 				be->be_length -= diff;
444 			} else {
445 				err = ext_tree_split(root, be, start);
446 				if (err)
447 					goto out;
448 			}
449 		}
450 
451 		if (ext_f_end(be) > end) {
452 			struct pnfs_block_extent *right = ext_tree_next(be);
453 
454 			if (right && ext_can_merge(be, right)) {
455 				sector_t diff = end - be->be_f_offset;
456 
457 				be->be_length -= diff;
458 
459 				right->be_f_offset -= diff;
460 				right->be_v_offset -= diff;
461 				right->be_length += diff;
462 			} else {
463 				err = ext_tree_split(root, be, end);
464 				if (err)
465 					goto out;
466 			}
467 		}
468 
469 		if (be->be_f_offset >= start && ext_f_end(be) <= end) {
470 			be->be_tag = EXTENT_WRITTEN;
471 			be = ext_try_to_merge_left(root, be);
472 			be = ext_try_to_merge_right(root, be);
473 		}
474 	}
475 out:
476 	if (bl->bl_lwb < lwb)
477 		bl->bl_lwb = lwb;
478 	spin_unlock(&bl->bl_ext_lock);
479 
480 	__ext_put_deviceids(&tmp);
481 	return err;
482 }
483 
ext_tree_layoutupdate_size(struct pnfs_block_layout * bl,size_t count)484 static size_t ext_tree_layoutupdate_size(struct pnfs_block_layout *bl, size_t count)
485 {
486 	if (bl->bl_scsi_layout)
487 		return sizeof(__be32) + PNFS_SCSI_RANGE_SIZE * count;
488 	else
489 		return sizeof(__be32) + PNFS_BLOCK_EXTENT_SIZE * count;
490 }
491 
ext_tree_free_commitdata(struct nfs4_layoutcommit_args * arg,size_t buffer_size)492 static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
493 		size_t buffer_size)
494 {
495 	if (arg->layoutupdate_pages != &arg->layoutupdate_page) {
496 		int nr_pages = DIV_ROUND_UP(buffer_size, PAGE_SIZE), i;
497 
498 		for (i = 0; i < nr_pages; i++)
499 			put_page(arg->layoutupdate_pages[i]);
500 		vfree(arg->start_p);
501 		kfree(arg->layoutupdate_pages);
502 	} else {
503 		put_page(arg->layoutupdate_page);
504 	}
505 }
506 
encode_block_extent(struct pnfs_block_extent * be,__be32 * p)507 static __be32 *encode_block_extent(struct pnfs_block_extent *be, __be32 *p)
508 {
509 	p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
510 			NFS4_DEVICEID4_SIZE);
511 	p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
512 	p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
513 	p = xdr_encode_hyper(p, 0LL);
514 	*p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
515 	return p;
516 }
517 
encode_scsi_range(struct pnfs_block_extent * be,__be32 * p)518 static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
519 {
520 	p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
521 	return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
522 }
523 
524 /**
525  * ext_tree_try_encode_commit - try to encode all extents into the buffer
526  * @bl: pointer to the layout
527  * @p: pointer to the output buffer
528  * @buffer_size: size of the output buffer
529  * @count: output pointer to the number of encoded extents
530  * @lastbyte: output pointer to the last written byte
531  *
532  * Return values:
533  *   %0: Success, all required extents encoded, outputs are valid
534  *   %-ENOSPC: Buffer too small, nothing encoded, outputs are invalid
535  */
536 static int
ext_tree_try_encode_commit(struct pnfs_block_layout * bl,__be32 * p,size_t buffer_size,size_t * count,__u64 * lastbyte)537 ext_tree_try_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
538 		size_t buffer_size, size_t *count, __u64 *lastbyte)
539 {
540 	struct pnfs_block_extent *be;
541 
542 	spin_lock(&bl->bl_ext_lock);
543 	for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) {
544 		if (be->be_state != PNFS_BLOCK_INVALID_DATA ||
545 		    be->be_tag != EXTENT_WRITTEN)
546 			continue;
547 
548 		(*count)++;
549 		if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
550 			spin_unlock(&bl->bl_ext_lock);
551 			return -ENOSPC;
552 		}
553 	}
554 	for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) {
555 		if (be->be_state != PNFS_BLOCK_INVALID_DATA ||
556 		    be->be_tag != EXTENT_WRITTEN)
557 			continue;
558 
559 		if (bl->bl_scsi_layout)
560 			p = encode_scsi_range(be, p);
561 		else
562 			p = encode_block_extent(be, p);
563 		be->be_tag = EXTENT_COMMITTING;
564 	}
565 	*lastbyte = (bl->bl_lwb != 0) ? bl->bl_lwb - 1 : U64_MAX;
566 	bl->bl_lwb = 0;
567 	spin_unlock(&bl->bl_ext_lock);
568 
569 	return 0;
570 }
571 
572 /**
573  * ext_tree_encode_commit - encode as much as possible extents into the buffer
574  * @bl: pointer to the layout
575  * @p: pointer to the output buffer
576  * @buffer_size: size of the output buffer
577  * @count: output pointer to the number of encoded extents
578  * @lastbyte: output pointer to the last written byte
579  *
580  * Return values:
581  *   %0: Success, all required extents encoded, outputs are valid
582  *   %-ENOSPC: Buffer too small, some extents are encoded, outputs are valid
583  */
584 static int
ext_tree_encode_commit(struct pnfs_block_layout * bl,__be32 * p,size_t buffer_size,size_t * count,__u64 * lastbyte)585 ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
586 		size_t buffer_size, size_t *count, __u64 *lastbyte)
587 {
588 	struct pnfs_block_extent *be, *be_prev;
589 	int ret = 0;
590 
591 	spin_lock(&bl->bl_ext_lock);
592 	for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) {
593 		if (be->be_state != PNFS_BLOCK_INVALID_DATA ||
594 		    be->be_tag != EXTENT_WRITTEN)
595 			continue;
596 
597 		(*count)++;
598 		if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
599 			(*count)--;
600 			ret = -ENOSPC;
601 			break;
602 		}
603 
604 		if (bl->bl_scsi_layout)
605 			p = encode_scsi_range(be, p);
606 		else
607 			p = encode_block_extent(be, p);
608 		be->be_tag = EXTENT_COMMITTING;
609 		be_prev = be;
610 	}
611 	if (!ret) {
612 		*lastbyte = (bl->bl_lwb != 0) ? bl->bl_lwb - 1 : U64_MAX;
613 		bl->bl_lwb = 0;
614 	} else {
615 		*lastbyte = be_prev->be_f_offset + be_prev->be_length;
616 		*lastbyte <<= SECTOR_SHIFT;
617 		*lastbyte -= 1;
618 	}
619 	spin_unlock(&bl->bl_ext_lock);
620 
621 	return ret;
622 }
623 
624 /**
625  * ext_tree_prepare_commit - encode extents that need to be committed
626  * @arg: layout commit data
627  *
628  * Return values:
629  *   %0: Success, all required extents are encoded
630  *   %-ENOSPC: Some extents are encoded, but not all, due to RPC size limit
631  *   %-ENOMEM: Out of memory, extents not encoded
632  */
633 int
ext_tree_prepare_commit(struct nfs4_layoutcommit_args * arg)634 ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
635 {
636 	struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout);
637 	size_t count = 0, buffer_size = PAGE_SIZE;
638 	__be32 *start_p;
639 	int ret;
640 
641 	arg->layoutupdate_page = alloc_page(GFP_NOFS);
642 	if (!arg->layoutupdate_page)
643 		return -ENOMEM;
644 	start_p = page_address(arg->layoutupdate_page);
645 	arg->layoutupdate_pages = &arg->layoutupdate_page;
646 
647 	ret = ext_tree_try_encode_commit(bl, start_p + 1, buffer_size,
648 			&count, &arg->lastbytewritten);
649 	if (unlikely(ret)) {
650 		ext_tree_free_commitdata(arg, buffer_size);
651 
652 		buffer_size = NFS_SERVER(arg->inode)->wsize;
653 		count = 0;
654 
655 		arg->layoutupdate_pages =
656 			kcalloc(DIV_ROUND_UP(buffer_size, PAGE_SIZE),
657 				sizeof(struct page *), GFP_NOFS);
658 		if (!arg->layoutupdate_pages)
659 			return -ENOMEM;
660 
661 		start_p = __vmalloc(buffer_size, GFP_NOFS);
662 		if (!start_p) {
663 			kfree(arg->layoutupdate_pages);
664 			return -ENOMEM;
665 		}
666 
667 		ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size,
668 				&count, &arg->lastbytewritten);
669 	}
670 
671 	*start_p = cpu_to_be32(count);
672 	arg->layoutupdate_len = ext_tree_layoutupdate_size(bl, count);
673 
674 	if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
675 		void *p = start_p, *end = p + arg->layoutupdate_len;
676 		struct page *page = NULL;
677 		int i = 0;
678 
679 		arg->start_p = start_p;
680 		for ( ; p < end; p += PAGE_SIZE) {
681 			page = vmalloc_to_page(p);
682 			arg->layoutupdate_pages[i++] = page;
683 			get_page(page);
684 		}
685 	}
686 
687 	trace_bl_ext_tree_prepare_commit(ret, count,
688 			arg->lastbytewritten, !!ret);
689 	return ret;
690 }
691 
692 void
ext_tree_mark_committed(struct nfs4_layoutcommit_args * arg,int status)693 ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status)
694 {
695 	struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout);
696 	struct rb_root *root = &bl->bl_ext_rw;
697 	struct pnfs_block_extent *be;
698 
699 	dprintk("%s status %d\n", __func__, status);
700 
701 	ext_tree_free_commitdata(arg, arg->layoutupdate_len);
702 
703 	spin_lock(&bl->bl_ext_lock);
704 	for (be = ext_tree_first(root); be; be = ext_tree_next(be)) {
705 		if (be->be_state != PNFS_BLOCK_INVALID_DATA ||
706 		    be->be_tag != EXTENT_COMMITTING)
707 			continue;
708 
709 		if (status) {
710 			/*
711 			 * Mark as written and try again.
712 			 *
713 			 * XXX: some real error handling here wouldn't hurt..
714 			 */
715 			be->be_tag = EXTENT_WRITTEN;
716 		} else {
717 			be->be_state = PNFS_BLOCK_READWRITE_DATA;
718 			be->be_tag = 0;
719 		}
720 
721 		be = ext_try_to_merge_left(root, be);
722 		be = ext_try_to_merge_right(root, be);
723 	}
724 	spin_unlock(&bl->bl_ext_lock);
725 }
726