xref: /linux/fs/ocfs2/extent_map.c (revision a5766f11cfd3a0c03450d99c8fe548c2940be884)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * extent_map.c
5  *
6  * Block/Cluster mapping functions
7  *
8  * Copyright (C) 2004 Oracle.  All rights reserved.
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public
12  * License, version 2,  as published by the Free Software Foundation.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public
20  * License along with this program; if not, write to the
21  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22  * Boston, MA 021110-1307, USA.
23  */
24 
25 #include <linux/fs.h>
26 #include <linux/init.h>
27 #include <linux/types.h>
28 #include <linux/fiemap.h>
29 
30 #define MLOG_MASK_PREFIX ML_EXTENT_MAP
31 #include <cluster/masklog.h>
32 
33 #include "ocfs2.h"
34 
35 #include "alloc.h"
36 #include "dlmglue.h"
37 #include "extent_map.h"
38 #include "inode.h"
39 #include "super.h"
40 
41 #include "buffer_head_io.h"
42 
43 /*
44  * The extent caching implementation is intentionally trivial.
45  *
46  * We only cache a small number of extents stored directly on the
47  * inode, so linear order operations are acceptable. If we ever want
48  * to increase the size of the extent map, then these algorithms must
49  * get smarter.
50  */
51 
52 void ocfs2_extent_map_init(struct inode *inode)
53 {
54 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
55 
56 	oi->ip_extent_map.em_num_items = 0;
57 	INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
58 }
59 
60 static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
61 				      unsigned int cpos,
62 				      struct ocfs2_extent_map_item **ret_emi)
63 {
64 	unsigned int range;
65 	struct ocfs2_extent_map_item *emi;
66 
67 	*ret_emi = NULL;
68 
69 	list_for_each_entry(emi, &em->em_list, ei_list) {
70 		range = emi->ei_cpos + emi->ei_clusters;
71 
72 		if (cpos >= emi->ei_cpos && cpos < range) {
73 			list_move(&emi->ei_list, &em->em_list);
74 
75 			*ret_emi = emi;
76 			break;
77 		}
78 	}
79 }
80 
81 static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
82 				   unsigned int *phys, unsigned int *len,
83 				   unsigned int *flags)
84 {
85 	unsigned int coff;
86 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
87 	struct ocfs2_extent_map_item *emi;
88 
89 	spin_lock(&oi->ip_lock);
90 
91 	__ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
92 	if (emi) {
93 		coff = cpos - emi->ei_cpos;
94 		*phys = emi->ei_phys + coff;
95 		if (len)
96 			*len = emi->ei_clusters - coff;
97 		if (flags)
98 			*flags = emi->ei_flags;
99 	}
100 
101 	spin_unlock(&oi->ip_lock);
102 
103 	if (emi == NULL)
104 		return -ENOENT;
105 
106 	return 0;
107 }
108 
109 /*
110  * Forget about all clusters equal to or greater than cpos.
111  */
112 void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
113 {
114 	struct ocfs2_extent_map_item *emi, *n;
115 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
116 	struct ocfs2_extent_map *em = &oi->ip_extent_map;
117 	LIST_HEAD(tmp_list);
118 	unsigned int range;
119 
120 	spin_lock(&oi->ip_lock);
121 	list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
122 		if (emi->ei_cpos >= cpos) {
123 			/* Full truncate of this record. */
124 			list_move(&emi->ei_list, &tmp_list);
125 			BUG_ON(em->em_num_items == 0);
126 			em->em_num_items--;
127 			continue;
128 		}
129 
130 		range = emi->ei_cpos + emi->ei_clusters;
131 		if (range > cpos) {
132 			/* Partial truncate */
133 			emi->ei_clusters = cpos - emi->ei_cpos;
134 		}
135 	}
136 	spin_unlock(&oi->ip_lock);
137 
138 	list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
139 		list_del(&emi->ei_list);
140 		kfree(emi);
141 	}
142 }
143 
144 /*
145  * Is any part of emi2 contained within emi1
146  */
147 static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
148 				 struct ocfs2_extent_map_item *emi2)
149 {
150 	unsigned int range1, range2;
151 
152 	/*
153 	 * Check if logical start of emi2 is inside emi1
154 	 */
155 	range1 = emi1->ei_cpos + emi1->ei_clusters;
156 	if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
157 		return 1;
158 
159 	/*
160 	 * Check if logical end of emi2 is inside emi1
161 	 */
162 	range2 = emi2->ei_cpos + emi2->ei_clusters;
163 	if (range2 > emi1->ei_cpos && range2 <= range1)
164 		return 1;
165 
166 	return 0;
167 }
168 
169 static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
170 				  struct ocfs2_extent_map_item *src)
171 {
172 	dest->ei_cpos = src->ei_cpos;
173 	dest->ei_phys = src->ei_phys;
174 	dest->ei_clusters = src->ei_clusters;
175 	dest->ei_flags = src->ei_flags;
176 }
177 
178 /*
179  * Try to merge emi with ins. Returns 1 if merge succeeds, zero
180  * otherwise.
181  */
182 static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
183 					 struct ocfs2_extent_map_item *ins)
184 {
185 	/*
186 	 * Handle contiguousness
187 	 */
188 	if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
189 	    ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
190 	    ins->ei_flags == emi->ei_flags) {
191 		emi->ei_clusters += ins->ei_clusters;
192 		return 1;
193 	} else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
194 		   (ins->ei_cpos + ins->ei_clusters) == emi->ei_phys &&
195 		   ins->ei_flags == emi->ei_flags) {
196 		emi->ei_phys = ins->ei_phys;
197 		emi->ei_cpos = ins->ei_cpos;
198 		emi->ei_clusters += ins->ei_clusters;
199 		return 1;
200 	}
201 
202 	/*
203 	 * Overlapping extents - this shouldn't happen unless we've
204 	 * split an extent to change it's flags. That is exceedingly
205 	 * rare, so there's no sense in trying to optimize it yet.
206 	 */
207 	if (ocfs2_ei_is_contained(emi, ins) ||
208 	    ocfs2_ei_is_contained(ins, emi)) {
209 		ocfs2_copy_emi_fields(emi, ins);
210 		return 1;
211 	}
212 
213 	/* No merge was possible. */
214 	return 0;
215 }
216 
217 /*
218  * In order to reduce complexity on the caller, this insert function
219  * is intentionally liberal in what it will accept.
220  *
221  * The only rule is that the truncate call *must* be used whenever
222  * records have been deleted. This avoids inserting overlapping
223  * records with different physical mappings.
224  */
225 void ocfs2_extent_map_insert_rec(struct inode *inode,
226 				 struct ocfs2_extent_rec *rec)
227 {
228 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
229 	struct ocfs2_extent_map *em = &oi->ip_extent_map;
230 	struct ocfs2_extent_map_item *emi, *new_emi = NULL;
231 	struct ocfs2_extent_map_item ins;
232 
233 	ins.ei_cpos = le32_to_cpu(rec->e_cpos);
234 	ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
235 					       le64_to_cpu(rec->e_blkno));
236 	ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
237 	ins.ei_flags = rec->e_flags;
238 
239 search:
240 	spin_lock(&oi->ip_lock);
241 
242 	list_for_each_entry(emi, &em->em_list, ei_list) {
243 		if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
244 			list_move(&emi->ei_list, &em->em_list);
245 			spin_unlock(&oi->ip_lock);
246 			goto out;
247 		}
248 	}
249 
250 	/*
251 	 * No item could be merged.
252 	 *
253 	 * Either allocate and add a new item, or overwrite the last recently
254 	 * inserted.
255 	 */
256 
257 	if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
258 		if (new_emi == NULL) {
259 			spin_unlock(&oi->ip_lock);
260 
261 			new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
262 			if (new_emi == NULL)
263 				goto out;
264 
265 			goto search;
266 		}
267 
268 		ocfs2_copy_emi_fields(new_emi, &ins);
269 		list_add(&new_emi->ei_list, &em->em_list);
270 		em->em_num_items++;
271 		new_emi = NULL;
272 	} else {
273 		BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
274 		emi = list_entry(em->em_list.prev,
275 				 struct ocfs2_extent_map_item, ei_list);
276 		list_move(&emi->ei_list, &em->em_list);
277 		ocfs2_copy_emi_fields(emi, &ins);
278 	}
279 
280 	spin_unlock(&oi->ip_lock);
281 
282 out:
283 	if (new_emi)
284 		kfree(new_emi);
285 }
286 
287 static int ocfs2_last_eb_is_empty(struct inode *inode,
288 				  struct ocfs2_dinode *di)
289 {
290 	int ret, next_free;
291 	u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
292 	struct buffer_head *eb_bh = NULL;
293 	struct ocfs2_extent_block *eb;
294 	struct ocfs2_extent_list *el;
295 
296 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), last_eb_blk,
297 			       &eb_bh, OCFS2_BH_CACHED, inode);
298 	if (ret) {
299 		mlog_errno(ret);
300 		goto out;
301 	}
302 
303 	eb = (struct ocfs2_extent_block *) eb_bh->b_data;
304 	el = &eb->h_list;
305 
306 	if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
307 		ret = -EROFS;
308 		OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
309 		goto out;
310 	}
311 
312 	if (el->l_tree_depth) {
313 		ocfs2_error(inode->i_sb,
314 			    "Inode %lu has non zero tree depth in "
315 			    "leaf block %llu\n", inode->i_ino,
316 			    (unsigned long long)eb_bh->b_blocknr);
317 		ret = -EROFS;
318 		goto out;
319 	}
320 
321 	next_free = le16_to_cpu(el->l_next_free_rec);
322 
323 	if (next_free == 0 ||
324 	    (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
325 		ret = 1;
326 
327 out:
328 	brelse(eb_bh);
329 	return ret;
330 }
331 
332 /*
333  * Return the 1st index within el which contains an extent start
334  * larger than v_cluster.
335  */
336 static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
337 				       u32 v_cluster)
338 {
339 	int i;
340 	struct ocfs2_extent_rec *rec;
341 
342 	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
343 		rec = &el->l_recs[i];
344 
345 		if (v_cluster < le32_to_cpu(rec->e_cpos))
346 			break;
347 	}
348 
349 	return i;
350 }
351 
352 /*
353  * Figure out the size of a hole which starts at v_cluster within the given
354  * extent list.
355  *
356  * If there is no more allocation past v_cluster, we return the maximum
357  * cluster size minus v_cluster.
358  *
359  * If we have in-inode extents, then el points to the dinode list and
360  * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
361  * containing el.
362  */
363 static int ocfs2_figure_hole_clusters(struct inode *inode,
364 				      struct ocfs2_extent_list *el,
365 				      struct buffer_head *eb_bh,
366 				      u32 v_cluster,
367 				      u32 *num_clusters)
368 {
369 	int ret, i;
370 	struct buffer_head *next_eb_bh = NULL;
371 	struct ocfs2_extent_block *eb, *next_eb;
372 
373 	i = ocfs2_search_for_hole_index(el, v_cluster);
374 
375 	if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
376 		eb = (struct ocfs2_extent_block *)eb_bh->b_data;
377 
378 		/*
379 		 * Check the next leaf for any extents.
380 		 */
381 
382 		if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
383 			goto no_more_extents;
384 
385 		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
386 				       le64_to_cpu(eb->h_next_leaf_blk),
387 				       &next_eb_bh, OCFS2_BH_CACHED, inode);
388 		if (ret) {
389 			mlog_errno(ret);
390 			goto out;
391 		}
392 		next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
393 
394 		if (!OCFS2_IS_VALID_EXTENT_BLOCK(next_eb)) {
395 			ret = -EROFS;
396 			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, next_eb);
397 			goto out;
398 		}
399 
400 		el = &next_eb->h_list;
401 
402 		i = ocfs2_search_for_hole_index(el, v_cluster);
403 	}
404 
405 no_more_extents:
406 	if (i == le16_to_cpu(el->l_next_free_rec)) {
407 		/*
408 		 * We're at the end of our existing allocation. Just
409 		 * return the maximum number of clusters we could
410 		 * possibly allocate.
411 		 */
412 		*num_clusters = UINT_MAX - v_cluster;
413 	} else {
414 		*num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
415 	}
416 
417 	ret = 0;
418 out:
419 	brelse(next_eb_bh);
420 	return ret;
421 }
422 
423 static int ocfs2_get_clusters_nocache(struct inode *inode,
424 				      struct buffer_head *di_bh,
425 				      u32 v_cluster, unsigned int *hole_len,
426 				      struct ocfs2_extent_rec *ret_rec,
427 				      unsigned int *is_last)
428 {
429 	int i, ret, tree_height, len;
430 	struct ocfs2_dinode *di;
431 	struct ocfs2_extent_block *uninitialized_var(eb);
432 	struct ocfs2_extent_list *el;
433 	struct ocfs2_extent_rec *rec;
434 	struct buffer_head *eb_bh = NULL;
435 
436 	memset(ret_rec, 0, sizeof(*ret_rec));
437 	if (is_last)
438 		*is_last = 0;
439 
440 	di = (struct ocfs2_dinode *) di_bh->b_data;
441 	el = &di->id2.i_list;
442 	tree_height = le16_to_cpu(el->l_tree_depth);
443 
444 	if (tree_height > 0) {
445 		ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh);
446 		if (ret) {
447 			mlog_errno(ret);
448 			goto out;
449 		}
450 
451 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
452 		el = &eb->h_list;
453 
454 		if (el->l_tree_depth) {
455 			ocfs2_error(inode->i_sb,
456 				    "Inode %lu has non zero tree depth in "
457 				    "leaf block %llu\n", inode->i_ino,
458 				    (unsigned long long)eb_bh->b_blocknr);
459 			ret = -EROFS;
460 			goto out;
461 		}
462 	}
463 
464 	i = ocfs2_search_extent_list(el, v_cluster);
465 	if (i == -1) {
466 		/*
467 		 * Holes can be larger than the maximum size of an
468 		 * extent, so we return their lengths in a seperate
469 		 * field.
470 		 */
471 		if (hole_len) {
472 			ret = ocfs2_figure_hole_clusters(inode, el, eb_bh,
473 							 v_cluster, &len);
474 			if (ret) {
475 				mlog_errno(ret);
476 				goto out;
477 			}
478 
479 			*hole_len = len;
480 		}
481 		goto out_hole;
482 	}
483 
484 	rec = &el->l_recs[i];
485 
486 	BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
487 
488 	if (!rec->e_blkno) {
489 		ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
490 			    "record (%u, %u, 0)", inode->i_ino,
491 			    le32_to_cpu(rec->e_cpos),
492 			    ocfs2_rec_clusters(el, rec));
493 		ret = -EROFS;
494 		goto out;
495 	}
496 
497 	*ret_rec = *rec;
498 
499 	/*
500 	 * Checking for last extent is potentially expensive - we
501 	 * might have to look at the next leaf over to see if it's
502 	 * empty.
503 	 *
504 	 * The first two checks are to see whether the caller even
505 	 * cares for this information, and if the extent is at least
506 	 * the last in it's list.
507 	 *
508 	 * If those hold true, then the extent is last if any of the
509 	 * additional conditions hold true:
510 	 *  - Extent list is in-inode
511 	 *  - Extent list is right-most
512 	 *  - Extent list is 2nd to rightmost, with empty right-most
513 	 */
514 	if (is_last) {
515 		if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
516 			if (tree_height == 0)
517 				*is_last = 1;
518 			else if (eb->h_blkno == di->i_last_eb_blk)
519 				*is_last = 1;
520 			else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
521 				ret = ocfs2_last_eb_is_empty(inode, di);
522 				if (ret < 0) {
523 					mlog_errno(ret);
524 					goto out;
525 				}
526 				if (ret == 1)
527 					*is_last = 1;
528 			}
529 		}
530 	}
531 
532 out_hole:
533 	ret = 0;
534 out:
535 	brelse(eb_bh);
536 	return ret;
537 }
538 
539 static void ocfs2_relative_extent_offsets(struct super_block *sb,
540 					  u32 v_cluster,
541 					  struct ocfs2_extent_rec *rec,
542 					  u32 *p_cluster, u32 *num_clusters)
543 
544 {
545 	u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
546 
547 	*p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
548 	*p_cluster = *p_cluster + coff;
549 
550 	if (num_clusters)
551 		*num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
552 }
553 
554 int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
555 		       u32 *p_cluster, u32 *num_clusters,
556 		       unsigned int *extent_flags)
557 {
558 	int ret;
559 	unsigned int uninitialized_var(hole_len), flags = 0;
560 	struct buffer_head *di_bh = NULL;
561 	struct ocfs2_extent_rec rec;
562 
563 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
564 		ret = -ERANGE;
565 		mlog_errno(ret);
566 		goto out;
567 	}
568 
569 	ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
570 				      num_clusters, extent_flags);
571 	if (ret == 0)
572 		goto out;
573 
574 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
575 			       &di_bh, OCFS2_BH_CACHED, inode);
576 	if (ret) {
577 		mlog_errno(ret);
578 		goto out;
579 	}
580 
581 	ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
582 					 &rec, NULL);
583 	if (ret) {
584 		mlog_errno(ret);
585 		goto out;
586 	}
587 
588 	if (rec.e_blkno == 0ULL) {
589 		/*
590 		 * A hole was found. Return some canned values that
591 		 * callers can key on. If asked for, num_clusters will
592 		 * be populated with the size of the hole.
593 		 */
594 		*p_cluster = 0;
595 		if (num_clusters) {
596 			*num_clusters = hole_len;
597 		}
598 	} else {
599 		ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
600 					      p_cluster, num_clusters);
601 		flags = rec.e_flags;
602 
603 		ocfs2_extent_map_insert_rec(inode, &rec);
604 	}
605 
606 	if (extent_flags)
607 		*extent_flags = flags;
608 
609 out:
610 	brelse(di_bh);
611 	return ret;
612 }
613 
614 /*
615  * This expects alloc_sem to be held. The allocation cannot change at
616  * all while the map is in the process of being updated.
617  */
618 int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
619 				u64 *ret_count, unsigned int *extent_flags)
620 {
621 	int ret;
622 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
623 	u32 cpos, num_clusters, p_cluster;
624 	u64 boff = 0;
625 
626 	cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
627 
628 	ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
629 				 extent_flags);
630 	if (ret) {
631 		mlog_errno(ret);
632 		goto out;
633 	}
634 
635 	/*
636 	 * p_cluster == 0 indicates a hole.
637 	 */
638 	if (p_cluster) {
639 		boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
640 		boff += (v_blkno & (u64)(bpc - 1));
641 	}
642 
643 	*p_blkno = boff;
644 
645 	if (ret_count) {
646 		*ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
647 		*ret_count -= v_blkno & (u64)(bpc - 1);
648 	}
649 
650 out:
651 	return ret;
652 }
653 
654 static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
655 			       struct fiemap_extent_info *fieinfo,
656 			       u64 map_start)
657 {
658 	int ret;
659 	unsigned int id_count;
660 	struct ocfs2_dinode *di;
661 	u64 phys;
662 	u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
663 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
664 
665 	di = (struct ocfs2_dinode *)di_bh->b_data;
666 	id_count = le16_to_cpu(di->id2.i_data.id_count);
667 
668 	if (map_start < id_count) {
669 		phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
670 		phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data);
671 
672 		ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
673 					      flags);
674 		if (ret < 0)
675 			return ret;
676 	}
677 
678 	return 0;
679 }
680 
681 #define OCFS2_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC)
682 
683 int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
684 		 u64 map_start, u64 map_len)
685 {
686 	int ret, is_last;
687 	u32 mapping_end, cpos;
688 	unsigned int hole_size;
689 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
690 	u64 len_bytes, phys_bytes, virt_bytes;
691 	struct buffer_head *di_bh = NULL;
692 	struct ocfs2_extent_rec rec;
693 
694 	ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
695 	if (ret)
696 		return ret;
697 
698 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
699 	if (ret) {
700 		mlog_errno(ret);
701 		goto out;
702 	}
703 
704 	down_read(&OCFS2_I(inode)->ip_alloc_sem);
705 
706 	/*
707 	 * Handle inline-data separately.
708 	 */
709 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
710 		ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
711 		goto out_unlock;
712 	}
713 
714 	cpos = map_start >> osb->s_clustersize_bits;
715 	mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
716 					       map_start + map_len);
717 	mapping_end -= cpos;
718 	is_last = 0;
719 	while (cpos < mapping_end && !is_last) {
720 		u32 fe_flags;
721 
722 		ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
723 						 &hole_size, &rec, &is_last);
724 		if (ret) {
725 			mlog_errno(ret);
726 			goto out;
727 		}
728 
729 		if (rec.e_blkno == 0ULL) {
730 			cpos += hole_size;
731 			continue;
732 		}
733 
734 		fe_flags = 0;
735 		if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
736 			fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
737 		if (is_last)
738 			fe_flags |= FIEMAP_EXTENT_LAST;
739 		len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
740 		phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
741 		virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
742 
743 		ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
744 					      len_bytes, fe_flags);
745 		if (ret)
746 			break;
747 
748 		cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
749 	}
750 
751 	if (ret > 0)
752 		ret = 0;
753 
754 out_unlock:
755 	brelse(di_bh);
756 
757 	up_read(&OCFS2_I(inode)->ip_alloc_sem);
758 
759 	ocfs2_inode_unlock(inode, 0);
760 out:
761 
762 	return ret;
763 }
764