xref: /linux/fs/ocfs2/extent_map.c (revision 5a0e3ad6af8660be21ca98a971cd00f331318c05)
1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*-
2ccd979bdSMark Fasheh  * vim: noexpandtab sw=8 ts=8 sts=0:
3ccd979bdSMark Fasheh  *
4ccd979bdSMark Fasheh  * extent_map.c
5ccd979bdSMark Fasheh  *
6363041a5SMark Fasheh  * Block/Cluster mapping functions
7ccd979bdSMark Fasheh  *
8ccd979bdSMark Fasheh  * Copyright (C) 2004 Oracle.  All rights reserved.
9ccd979bdSMark Fasheh  *
10ccd979bdSMark Fasheh  * This program is free software; you can redistribute it and/or
11ccd979bdSMark Fasheh  * modify it under the terms of the GNU General Public
12ccd979bdSMark Fasheh  * License, version 2,  as published by the Free Software Foundation.
13ccd979bdSMark Fasheh  *
14ccd979bdSMark Fasheh  * This program is distributed in the hope that it will be useful,
15ccd979bdSMark Fasheh  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16ccd979bdSMark Fasheh  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17ccd979bdSMark Fasheh  * General Public License for more details.
18ccd979bdSMark Fasheh  *
19ccd979bdSMark Fasheh  * You should have received a copy of the GNU General Public
20ccd979bdSMark Fasheh  * License along with this program; if not, write to the
21ccd979bdSMark Fasheh  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22ccd979bdSMark Fasheh  * Boston, MA 021110-1307, USA.
23ccd979bdSMark Fasheh  */
24ccd979bdSMark Fasheh 
25ccd979bdSMark Fasheh #include <linux/fs.h>
26ccd979bdSMark Fasheh #include <linux/init.h>
27*5a0e3ad6STejun Heo #include <linux/slab.h>
28ccd979bdSMark Fasheh #include <linux/types.h>
2900dc417fSMark Fasheh #include <linux/fiemap.h>
30ccd979bdSMark Fasheh 
31ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_EXTENT_MAP
32ccd979bdSMark Fasheh #include <cluster/masklog.h>
33ccd979bdSMark Fasheh 
34ccd979bdSMark Fasheh #include "ocfs2.h"
35ccd979bdSMark Fasheh 
36363041a5SMark Fasheh #include "alloc.h"
3700dc417fSMark Fasheh #include "dlmglue.h"
38ccd979bdSMark Fasheh #include "extent_map.h"
39ccd979bdSMark Fasheh #include "inode.h"
40ccd979bdSMark Fasheh #include "super.h"
4186239d59STristan Ye #include "symlink.h"
42ccd979bdSMark Fasheh 
43ccd979bdSMark Fasheh #include "buffer_head_io.h"
44ccd979bdSMark Fasheh 
45ccd979bdSMark Fasheh /*
4683418978SMark Fasheh  * The extent caching implementation is intentionally trivial.
4783418978SMark Fasheh  *
4883418978SMark Fasheh  * We only cache a small number of extents stored directly on the
4983418978SMark Fasheh  * inode, so linear order operations are acceptable. If we ever want
5083418978SMark Fasheh  * to increase the size of the extent map, then these algorithms must
5183418978SMark Fasheh  * get smarter.
5283418978SMark Fasheh  */
5383418978SMark Fasheh 
5483418978SMark Fasheh void ocfs2_extent_map_init(struct inode *inode)
5583418978SMark Fasheh {
5683418978SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
5783418978SMark Fasheh 
5883418978SMark Fasheh 	oi->ip_extent_map.em_num_items = 0;
5983418978SMark Fasheh 	INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
6083418978SMark Fasheh }
6183418978SMark Fasheh 
6283418978SMark Fasheh static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
6383418978SMark Fasheh 				      unsigned int cpos,
6483418978SMark Fasheh 				      struct ocfs2_extent_map_item **ret_emi)
6583418978SMark Fasheh {
6683418978SMark Fasheh 	unsigned int range;
6783418978SMark Fasheh 	struct ocfs2_extent_map_item *emi;
6883418978SMark Fasheh 
6983418978SMark Fasheh 	*ret_emi = NULL;
7083418978SMark Fasheh 
7183418978SMark Fasheh 	list_for_each_entry(emi, &em->em_list, ei_list) {
7283418978SMark Fasheh 		range = emi->ei_cpos + emi->ei_clusters;
7383418978SMark Fasheh 
7483418978SMark Fasheh 		if (cpos >= emi->ei_cpos && cpos < range) {
7583418978SMark Fasheh 			list_move(&emi->ei_list, &em->em_list);
7683418978SMark Fasheh 
7783418978SMark Fasheh 			*ret_emi = emi;
7883418978SMark Fasheh 			break;
7983418978SMark Fasheh 		}
8083418978SMark Fasheh 	}
8183418978SMark Fasheh }
8283418978SMark Fasheh 
8383418978SMark Fasheh static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
8483418978SMark Fasheh 				   unsigned int *phys, unsigned int *len,
8583418978SMark Fasheh 				   unsigned int *flags)
8683418978SMark Fasheh {
8783418978SMark Fasheh 	unsigned int coff;
8883418978SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
8983418978SMark Fasheh 	struct ocfs2_extent_map_item *emi;
9083418978SMark Fasheh 
9183418978SMark Fasheh 	spin_lock(&oi->ip_lock);
9283418978SMark Fasheh 
9383418978SMark Fasheh 	__ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
9483418978SMark Fasheh 	if (emi) {
9583418978SMark Fasheh 		coff = cpos - emi->ei_cpos;
9683418978SMark Fasheh 		*phys = emi->ei_phys + coff;
9783418978SMark Fasheh 		if (len)
9883418978SMark Fasheh 			*len = emi->ei_clusters - coff;
9983418978SMark Fasheh 		if (flags)
10083418978SMark Fasheh 			*flags = emi->ei_flags;
10183418978SMark Fasheh 	}
10283418978SMark Fasheh 
10383418978SMark Fasheh 	spin_unlock(&oi->ip_lock);
10483418978SMark Fasheh 
10583418978SMark Fasheh 	if (emi == NULL)
10683418978SMark Fasheh 		return -ENOENT;
10783418978SMark Fasheh 
10883418978SMark Fasheh 	return 0;
10983418978SMark Fasheh }
11083418978SMark Fasheh 
11183418978SMark Fasheh /*
11283418978SMark Fasheh  * Forget about all clusters equal to or greater than cpos.
11383418978SMark Fasheh  */
11483418978SMark Fasheh void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
11583418978SMark Fasheh {
116800deef3SChristoph Hellwig 	struct ocfs2_extent_map_item *emi, *n;
11783418978SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
11883418978SMark Fasheh 	struct ocfs2_extent_map *em = &oi->ip_extent_map;
11983418978SMark Fasheh 	LIST_HEAD(tmp_list);
12083418978SMark Fasheh 	unsigned int range;
12183418978SMark Fasheh 
12283418978SMark Fasheh 	spin_lock(&oi->ip_lock);
123800deef3SChristoph Hellwig 	list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
12483418978SMark Fasheh 		if (emi->ei_cpos >= cpos) {
12583418978SMark Fasheh 			/* Full truncate of this record. */
12683418978SMark Fasheh 			list_move(&emi->ei_list, &tmp_list);
12783418978SMark Fasheh 			BUG_ON(em->em_num_items == 0);
12883418978SMark Fasheh 			em->em_num_items--;
12983418978SMark Fasheh 			continue;
13083418978SMark Fasheh 		}
13183418978SMark Fasheh 
13283418978SMark Fasheh 		range = emi->ei_cpos + emi->ei_clusters;
13383418978SMark Fasheh 		if (range > cpos) {
13483418978SMark Fasheh 			/* Partial truncate */
13583418978SMark Fasheh 			emi->ei_clusters = cpos - emi->ei_cpos;
13683418978SMark Fasheh 		}
13783418978SMark Fasheh 	}
13883418978SMark Fasheh 	spin_unlock(&oi->ip_lock);
13983418978SMark Fasheh 
140800deef3SChristoph Hellwig 	list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
14183418978SMark Fasheh 		list_del(&emi->ei_list);
14283418978SMark Fasheh 		kfree(emi);
14383418978SMark Fasheh 	}
14483418978SMark Fasheh }
14583418978SMark Fasheh 
14683418978SMark Fasheh /*
14783418978SMark Fasheh  * Is any part of emi2 contained within emi1
14883418978SMark Fasheh  */
14983418978SMark Fasheh static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
15083418978SMark Fasheh 				 struct ocfs2_extent_map_item *emi2)
15183418978SMark Fasheh {
15283418978SMark Fasheh 	unsigned int range1, range2;
15383418978SMark Fasheh 
15483418978SMark Fasheh 	/*
15583418978SMark Fasheh 	 * Check if logical start of emi2 is inside emi1
15683418978SMark Fasheh 	 */
15783418978SMark Fasheh 	range1 = emi1->ei_cpos + emi1->ei_clusters;
15883418978SMark Fasheh 	if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
15983418978SMark Fasheh 		return 1;
16083418978SMark Fasheh 
16183418978SMark Fasheh 	/*
16283418978SMark Fasheh 	 * Check if logical end of emi2 is inside emi1
16383418978SMark Fasheh 	 */
16483418978SMark Fasheh 	range2 = emi2->ei_cpos + emi2->ei_clusters;
16583418978SMark Fasheh 	if (range2 > emi1->ei_cpos && range2 <= range1)
16683418978SMark Fasheh 		return 1;
16783418978SMark Fasheh 
16883418978SMark Fasheh 	return 0;
16983418978SMark Fasheh }
17083418978SMark Fasheh 
17183418978SMark Fasheh static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
17283418978SMark Fasheh 				  struct ocfs2_extent_map_item *src)
17383418978SMark Fasheh {
17483418978SMark Fasheh 	dest->ei_cpos = src->ei_cpos;
17583418978SMark Fasheh 	dest->ei_phys = src->ei_phys;
17683418978SMark Fasheh 	dest->ei_clusters = src->ei_clusters;
17783418978SMark Fasheh 	dest->ei_flags = src->ei_flags;
17883418978SMark Fasheh }
17983418978SMark Fasheh 
18083418978SMark Fasheh /*
18183418978SMark Fasheh  * Try to merge emi with ins. Returns 1 if merge succeeds, zero
18283418978SMark Fasheh  * otherwise.
18383418978SMark Fasheh  */
18483418978SMark Fasheh static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
18583418978SMark Fasheh 					 struct ocfs2_extent_map_item *ins)
18683418978SMark Fasheh {
18783418978SMark Fasheh 	/*
18883418978SMark Fasheh 	 * Handle contiguousness
18983418978SMark Fasheh 	 */
19083418978SMark Fasheh 	if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
19183418978SMark Fasheh 	    ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
19283418978SMark Fasheh 	    ins->ei_flags == emi->ei_flags) {
19383418978SMark Fasheh 		emi->ei_clusters += ins->ei_clusters;
19483418978SMark Fasheh 		return 1;
19583418978SMark Fasheh 	} else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
196bd6b0bf8SRoel Kluin 		   (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
19783418978SMark Fasheh 		   ins->ei_flags == emi->ei_flags) {
19883418978SMark Fasheh 		emi->ei_phys = ins->ei_phys;
19983418978SMark Fasheh 		emi->ei_cpos = ins->ei_cpos;
20083418978SMark Fasheh 		emi->ei_clusters += ins->ei_clusters;
20183418978SMark Fasheh 		return 1;
20283418978SMark Fasheh 	}
20383418978SMark Fasheh 
20483418978SMark Fasheh 	/*
20583418978SMark Fasheh 	 * Overlapping extents - this shouldn't happen unless we've
20683418978SMark Fasheh 	 * split an extent to change it's flags. That is exceedingly
20783418978SMark Fasheh 	 * rare, so there's no sense in trying to optimize it yet.
20883418978SMark Fasheh 	 */
20983418978SMark Fasheh 	if (ocfs2_ei_is_contained(emi, ins) ||
21083418978SMark Fasheh 	    ocfs2_ei_is_contained(ins, emi)) {
21183418978SMark Fasheh 		ocfs2_copy_emi_fields(emi, ins);
21283418978SMark Fasheh 		return 1;
21383418978SMark Fasheh 	}
21483418978SMark Fasheh 
21583418978SMark Fasheh 	/* No merge was possible. */
21683418978SMark Fasheh 	return 0;
21783418978SMark Fasheh }
21883418978SMark Fasheh 
21983418978SMark Fasheh /*
22083418978SMark Fasheh  * In order to reduce complexity on the caller, this insert function
22183418978SMark Fasheh  * is intentionally liberal in what it will accept.
22283418978SMark Fasheh  *
22383418978SMark Fasheh  * The only rule is that the truncate call *must* be used whenever
22483418978SMark Fasheh  * records have been deleted. This avoids inserting overlapping
22583418978SMark Fasheh  * records with different physical mappings.
22683418978SMark Fasheh  */
22783418978SMark Fasheh void ocfs2_extent_map_insert_rec(struct inode *inode,
22883418978SMark Fasheh 				 struct ocfs2_extent_rec *rec)
22983418978SMark Fasheh {
23083418978SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
23183418978SMark Fasheh 	struct ocfs2_extent_map *em = &oi->ip_extent_map;
23283418978SMark Fasheh 	struct ocfs2_extent_map_item *emi, *new_emi = NULL;
23383418978SMark Fasheh 	struct ocfs2_extent_map_item ins;
23483418978SMark Fasheh 
23583418978SMark Fasheh 	ins.ei_cpos = le32_to_cpu(rec->e_cpos);
23683418978SMark Fasheh 	ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
23783418978SMark Fasheh 					       le64_to_cpu(rec->e_blkno));
23883418978SMark Fasheh 	ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
23983418978SMark Fasheh 	ins.ei_flags = rec->e_flags;
24083418978SMark Fasheh 
24183418978SMark Fasheh search:
24283418978SMark Fasheh 	spin_lock(&oi->ip_lock);
24383418978SMark Fasheh 
24483418978SMark Fasheh 	list_for_each_entry(emi, &em->em_list, ei_list) {
24583418978SMark Fasheh 		if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
24683418978SMark Fasheh 			list_move(&emi->ei_list, &em->em_list);
24783418978SMark Fasheh 			spin_unlock(&oi->ip_lock);
24883418978SMark Fasheh 			goto out;
24983418978SMark Fasheh 		}
25083418978SMark Fasheh 	}
25183418978SMark Fasheh 
25283418978SMark Fasheh 	/*
25383418978SMark Fasheh 	 * No item could be merged.
25483418978SMark Fasheh 	 *
25583418978SMark Fasheh 	 * Either allocate and add a new item, or overwrite the last recently
25683418978SMark Fasheh 	 * inserted.
25783418978SMark Fasheh 	 */
25883418978SMark Fasheh 
25983418978SMark Fasheh 	if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
26083418978SMark Fasheh 		if (new_emi == NULL) {
26183418978SMark Fasheh 			spin_unlock(&oi->ip_lock);
26283418978SMark Fasheh 
26383418978SMark Fasheh 			new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
26483418978SMark Fasheh 			if (new_emi == NULL)
26583418978SMark Fasheh 				goto out;
26683418978SMark Fasheh 
26783418978SMark Fasheh 			goto search;
26883418978SMark Fasheh 		}
26983418978SMark Fasheh 
27083418978SMark Fasheh 		ocfs2_copy_emi_fields(new_emi, &ins);
27183418978SMark Fasheh 		list_add(&new_emi->ei_list, &em->em_list);
27283418978SMark Fasheh 		em->em_num_items++;
27383418978SMark Fasheh 		new_emi = NULL;
27483418978SMark Fasheh 	} else {
27583418978SMark Fasheh 		BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
27683418978SMark Fasheh 		emi = list_entry(em->em_list.prev,
27783418978SMark Fasheh 				 struct ocfs2_extent_map_item, ei_list);
27883418978SMark Fasheh 		list_move(&emi->ei_list, &em->em_list);
27983418978SMark Fasheh 		ocfs2_copy_emi_fields(emi, &ins);
28083418978SMark Fasheh 	}
28183418978SMark Fasheh 
28283418978SMark Fasheh 	spin_unlock(&oi->ip_lock);
28383418978SMark Fasheh 
28483418978SMark Fasheh out:
28583418978SMark Fasheh 	if (new_emi)
28683418978SMark Fasheh 		kfree(new_emi);
28783418978SMark Fasheh }
28883418978SMark Fasheh 
28900dc417fSMark Fasheh static int ocfs2_last_eb_is_empty(struct inode *inode,
29000dc417fSMark Fasheh 				  struct ocfs2_dinode *di)
29100dc417fSMark Fasheh {
29200dc417fSMark Fasheh 	int ret, next_free;
29300dc417fSMark Fasheh 	u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
29400dc417fSMark Fasheh 	struct buffer_head *eb_bh = NULL;
29500dc417fSMark Fasheh 	struct ocfs2_extent_block *eb;
29600dc417fSMark Fasheh 	struct ocfs2_extent_list *el;
29700dc417fSMark Fasheh 
2983d03a305SJoel Becker 	ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
29900dc417fSMark Fasheh 	if (ret) {
30000dc417fSMark Fasheh 		mlog_errno(ret);
30100dc417fSMark Fasheh 		goto out;
30200dc417fSMark Fasheh 	}
30300dc417fSMark Fasheh 
30400dc417fSMark Fasheh 	eb = (struct ocfs2_extent_block *) eb_bh->b_data;
30500dc417fSMark Fasheh 	el = &eb->h_list;
30600dc417fSMark Fasheh 
30700dc417fSMark Fasheh 	if (el->l_tree_depth) {
30800dc417fSMark Fasheh 		ocfs2_error(inode->i_sb,
30900dc417fSMark Fasheh 			    "Inode %lu has non zero tree depth in "
31000dc417fSMark Fasheh 			    "leaf block %llu\n", inode->i_ino,
31100dc417fSMark Fasheh 			    (unsigned long long)eb_bh->b_blocknr);
31200dc417fSMark Fasheh 		ret = -EROFS;
31300dc417fSMark Fasheh 		goto out;
31400dc417fSMark Fasheh 	}
31500dc417fSMark Fasheh 
31600dc417fSMark Fasheh 	next_free = le16_to_cpu(el->l_next_free_rec);
31700dc417fSMark Fasheh 
31800dc417fSMark Fasheh 	if (next_free == 0 ||
31900dc417fSMark Fasheh 	    (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
32000dc417fSMark Fasheh 		ret = 1;
32100dc417fSMark Fasheh 
32200dc417fSMark Fasheh out:
32300dc417fSMark Fasheh 	brelse(eb_bh);
32400dc417fSMark Fasheh 	return ret;
32500dc417fSMark Fasheh }
32600dc417fSMark Fasheh 
32783418978SMark Fasheh /*
3284f902c37SMark Fasheh  * Return the 1st index within el which contains an extent start
3294f902c37SMark Fasheh  * larger than v_cluster.
3304f902c37SMark Fasheh  */
3314f902c37SMark Fasheh static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
3324f902c37SMark Fasheh 				       u32 v_cluster)
3334f902c37SMark Fasheh {
3344f902c37SMark Fasheh 	int i;
3354f902c37SMark Fasheh 	struct ocfs2_extent_rec *rec;
3364f902c37SMark Fasheh 
3374f902c37SMark Fasheh 	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
3384f902c37SMark Fasheh 		rec = &el->l_recs[i];
3394f902c37SMark Fasheh 
3404f902c37SMark Fasheh 		if (v_cluster < le32_to_cpu(rec->e_cpos))
3414f902c37SMark Fasheh 			break;
3424f902c37SMark Fasheh 	}
3434f902c37SMark Fasheh 
3444f902c37SMark Fasheh 	return i;
3454f902c37SMark Fasheh }
3464f902c37SMark Fasheh 
3474f902c37SMark Fasheh /*
3484f902c37SMark Fasheh  * Figure out the size of a hole which starts at v_cluster within the given
3494f902c37SMark Fasheh  * extent list.
3504f902c37SMark Fasheh  *
3514f902c37SMark Fasheh  * If there is no more allocation past v_cluster, we return the maximum
3524f902c37SMark Fasheh  * cluster size minus v_cluster.
3534f902c37SMark Fasheh  *
3544f902c37SMark Fasheh  * If we have in-inode extents, then el points to the dinode list and
3554f902c37SMark Fasheh  * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
3564f902c37SMark Fasheh  * containing el.
3574f902c37SMark Fasheh  */
358e73a819dSTao Ma int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
3594f902c37SMark Fasheh 			       struct ocfs2_extent_list *el,
3604f902c37SMark Fasheh 			       struct buffer_head *eb_bh,
3614f902c37SMark Fasheh 			       u32 v_cluster,
3624f902c37SMark Fasheh 			       u32 *num_clusters)
3634f902c37SMark Fasheh {
3644f902c37SMark Fasheh 	int ret, i;
3654f902c37SMark Fasheh 	struct buffer_head *next_eb_bh = NULL;
3664f902c37SMark Fasheh 	struct ocfs2_extent_block *eb, *next_eb;
3674f902c37SMark Fasheh 
3684f902c37SMark Fasheh 	i = ocfs2_search_for_hole_index(el, v_cluster);
3694f902c37SMark Fasheh 
3704f902c37SMark Fasheh 	if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
3714f902c37SMark Fasheh 		eb = (struct ocfs2_extent_block *)eb_bh->b_data;
3724f902c37SMark Fasheh 
3734f902c37SMark Fasheh 		/*
3744f902c37SMark Fasheh 		 * Check the next leaf for any extents.
3754f902c37SMark Fasheh 		 */
3764f902c37SMark Fasheh 
3774f902c37SMark Fasheh 		if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
3784f902c37SMark Fasheh 			goto no_more_extents;
3794f902c37SMark Fasheh 
380e73a819dSTao Ma 		ret = ocfs2_read_extent_block(ci,
3814f902c37SMark Fasheh 					      le64_to_cpu(eb->h_next_leaf_blk),
3820fcaa56aSJoel Becker 					      &next_eb_bh);
3834f902c37SMark Fasheh 		if (ret) {
3844f902c37SMark Fasheh 			mlog_errno(ret);
3854f902c37SMark Fasheh 			goto out;
3864f902c37SMark Fasheh 		}
3875e96581aSJoel Becker 
3884f902c37SMark Fasheh 		next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
3894f902c37SMark Fasheh 		el = &next_eb->h_list;
3904f902c37SMark Fasheh 		i = ocfs2_search_for_hole_index(el, v_cluster);
3914f902c37SMark Fasheh 	}
3924f902c37SMark Fasheh 
3934f902c37SMark Fasheh no_more_extents:
3944f902c37SMark Fasheh 	if (i == le16_to_cpu(el->l_next_free_rec)) {
3954f902c37SMark Fasheh 		/*
3964f902c37SMark Fasheh 		 * We're at the end of our existing allocation. Just
3974f902c37SMark Fasheh 		 * return the maximum number of clusters we could
3984f902c37SMark Fasheh 		 * possibly allocate.
3994f902c37SMark Fasheh 		 */
4004f902c37SMark Fasheh 		*num_clusters = UINT_MAX - v_cluster;
4014f902c37SMark Fasheh 	} else {
4024f902c37SMark Fasheh 		*num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
4034f902c37SMark Fasheh 	}
4044f902c37SMark Fasheh 
4054f902c37SMark Fasheh 	ret = 0;
4064f902c37SMark Fasheh out:
4074f902c37SMark Fasheh 	brelse(next_eb_bh);
4084f902c37SMark Fasheh 	return ret;
4094f902c37SMark Fasheh }
4104f902c37SMark Fasheh 
41100dc417fSMark Fasheh static int ocfs2_get_clusters_nocache(struct inode *inode,
41200dc417fSMark Fasheh 				      struct buffer_head *di_bh,
41300dc417fSMark Fasheh 				      u32 v_cluster, unsigned int *hole_len,
41400dc417fSMark Fasheh 				      struct ocfs2_extent_rec *ret_rec,
41500dc417fSMark Fasheh 				      unsigned int *is_last)
416ccd979bdSMark Fasheh {
41700dc417fSMark Fasheh 	int i, ret, tree_height, len;
418ccd979bdSMark Fasheh 	struct ocfs2_dinode *di;
41900dc417fSMark Fasheh 	struct ocfs2_extent_block *uninitialized_var(eb);
420ccd979bdSMark Fasheh 	struct ocfs2_extent_list *el;
421ccd979bdSMark Fasheh 	struct ocfs2_extent_rec *rec;
42200dc417fSMark Fasheh 	struct buffer_head *eb_bh = NULL;
423ccd979bdSMark Fasheh 
42400dc417fSMark Fasheh 	memset(ret_rec, 0, sizeof(*ret_rec));
42500dc417fSMark Fasheh 	if (is_last)
42600dc417fSMark Fasheh 		*is_last = 0;
427363041a5SMark Fasheh 
428363041a5SMark Fasheh 	di = (struct ocfs2_dinode *) di_bh->b_data;
429363041a5SMark Fasheh 	el = &di->id2.i_list;
43000dc417fSMark Fasheh 	tree_height = le16_to_cpu(el->l_tree_depth);
431363041a5SMark Fasheh 
43200dc417fSMark Fasheh 	if (tree_height > 0) {
433facdb77fSJoel Becker 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
434facdb77fSJoel Becker 				      &eb_bh);
435363041a5SMark Fasheh 		if (ret) {
436363041a5SMark Fasheh 			mlog_errno(ret);
437363041a5SMark Fasheh 			goto out;
438363041a5SMark Fasheh 		}
439363041a5SMark Fasheh 
440363041a5SMark Fasheh 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
441363041a5SMark Fasheh 		el = &eb->h_list;
442e48edee2SMark Fasheh 
443e48edee2SMark Fasheh 		if (el->l_tree_depth) {
444e48edee2SMark Fasheh 			ocfs2_error(inode->i_sb,
445e48edee2SMark Fasheh 				    "Inode %lu has non zero tree depth in "
446e48edee2SMark Fasheh 				    "leaf block %llu\n", inode->i_ino,
447e48edee2SMark Fasheh 				    (unsigned long long)eb_bh->b_blocknr);
448e48edee2SMark Fasheh 			ret = -EROFS;
449e48edee2SMark Fasheh 			goto out;
450e48edee2SMark Fasheh 		}
451363041a5SMark Fasheh 	}
452363041a5SMark Fasheh 
453363041a5SMark Fasheh 	i = ocfs2_search_extent_list(el, v_cluster);
454363041a5SMark Fasheh 	if (i == -1) {
455363041a5SMark Fasheh 		/*
45600dc417fSMark Fasheh 		 * Holes can be larger than the maximum size of an
4573ad2f3fbSDaniel Mack 		 * extent, so we return their lengths in a separate
45800dc417fSMark Fasheh 		 * field.
459363041a5SMark Fasheh 		 */
46000dc417fSMark Fasheh 		if (hole_len) {
461e73a819dSTao Ma 			ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
462e73a819dSTao Ma 							 el, eb_bh,
46300dc417fSMark Fasheh 							 v_cluster, &len);
4644f902c37SMark Fasheh 			if (ret) {
4654f902c37SMark Fasheh 				mlog_errno(ret);
4664f902c37SMark Fasheh 				goto out;
4674f902c37SMark Fasheh 			}
46800dc417fSMark Fasheh 
46900dc417fSMark Fasheh 			*hole_len = len;
4704f902c37SMark Fasheh 		}
47100dc417fSMark Fasheh 		goto out_hole;
47200dc417fSMark Fasheh 	}
47300dc417fSMark Fasheh 
474363041a5SMark Fasheh 	rec = &el->l_recs[i];
475363041a5SMark Fasheh 
476363041a5SMark Fasheh 	BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
477363041a5SMark Fasheh 
478363041a5SMark Fasheh 	if (!rec->e_blkno) {
479363041a5SMark Fasheh 		ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
480363041a5SMark Fasheh 			    "record (%u, %u, 0)", inode->i_ino,
481363041a5SMark Fasheh 			    le32_to_cpu(rec->e_cpos),
482e48edee2SMark Fasheh 			    ocfs2_rec_clusters(el, rec));
483363041a5SMark Fasheh 		ret = -EROFS;
484363041a5SMark Fasheh 		goto out;
485363041a5SMark Fasheh 	}
486363041a5SMark Fasheh 
48700dc417fSMark Fasheh 	*ret_rec = *rec;
488363041a5SMark Fasheh 
48900dc417fSMark Fasheh 	/*
49000dc417fSMark Fasheh 	 * Checking for last extent is potentially expensive - we
49100dc417fSMark Fasheh 	 * might have to look at the next leaf over to see if it's
49200dc417fSMark Fasheh 	 * empty.
49300dc417fSMark Fasheh 	 *
49400dc417fSMark Fasheh 	 * The first two checks are to see whether the caller even
49500dc417fSMark Fasheh 	 * cares for this information, and if the extent is at least
49600dc417fSMark Fasheh 	 * the last in it's list.
49700dc417fSMark Fasheh 	 *
49800dc417fSMark Fasheh 	 * If those hold true, then the extent is last if any of the
49900dc417fSMark Fasheh 	 * additional conditions hold true:
50000dc417fSMark Fasheh 	 *  - Extent list is in-inode
50100dc417fSMark Fasheh 	 *  - Extent list is right-most
50200dc417fSMark Fasheh 	 *  - Extent list is 2nd to rightmost, with empty right-most
50300dc417fSMark Fasheh 	 */
50400dc417fSMark Fasheh 	if (is_last) {
50500dc417fSMark Fasheh 		if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
50600dc417fSMark Fasheh 			if (tree_height == 0)
50700dc417fSMark Fasheh 				*is_last = 1;
50800dc417fSMark Fasheh 			else if (eb->h_blkno == di->i_last_eb_blk)
50900dc417fSMark Fasheh 				*is_last = 1;
51000dc417fSMark Fasheh 			else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
51100dc417fSMark Fasheh 				ret = ocfs2_last_eb_is_empty(inode, di);
51200dc417fSMark Fasheh 				if (ret < 0) {
51300dc417fSMark Fasheh 					mlog_errno(ret);
51400dc417fSMark Fasheh 					goto out;
51500dc417fSMark Fasheh 				}
51600dc417fSMark Fasheh 				if (ret == 1)
51700dc417fSMark Fasheh 					*is_last = 1;
51800dc417fSMark Fasheh 			}
51900dc417fSMark Fasheh 		}
52000dc417fSMark Fasheh 	}
52100dc417fSMark Fasheh 
52200dc417fSMark Fasheh out_hole:
52300dc417fSMark Fasheh 	ret = 0;
52400dc417fSMark Fasheh out:
52500dc417fSMark Fasheh 	brelse(eb_bh);
52600dc417fSMark Fasheh 	return ret;
52700dc417fSMark Fasheh }
52800dc417fSMark Fasheh 
52900dc417fSMark Fasheh static void ocfs2_relative_extent_offsets(struct super_block *sb,
53000dc417fSMark Fasheh 					  u32 v_cluster,
53100dc417fSMark Fasheh 					  struct ocfs2_extent_rec *rec,
53200dc417fSMark Fasheh 					  u32 *p_cluster, u32 *num_clusters)
53300dc417fSMark Fasheh 
53400dc417fSMark Fasheh {
53500dc417fSMark Fasheh 	u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
53600dc417fSMark Fasheh 
53700dc417fSMark Fasheh 	*p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
538363041a5SMark Fasheh 	*p_cluster = *p_cluster + coff;
539363041a5SMark Fasheh 
540363041a5SMark Fasheh 	if (num_clusters)
54100dc417fSMark Fasheh 		*num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
54200dc417fSMark Fasheh }
54349cb8d2dSMark Fasheh 
544f56654c4STao Ma int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
545f56654c4STao Ma 			     u32 *p_cluster, u32 *num_clusters,
5461061f9c1STao Ma 			     struct ocfs2_extent_list *el,
5471061f9c1STao Ma 			     unsigned int *extent_flags)
548f56654c4STao Ma {
549f56654c4STao Ma 	int ret = 0, i;
550f56654c4STao Ma 	struct buffer_head *eb_bh = NULL;
551f56654c4STao Ma 	struct ocfs2_extent_block *eb;
552f56654c4STao Ma 	struct ocfs2_extent_rec *rec;
553f56654c4STao Ma 	u32 coff;
554f56654c4STao Ma 
555f56654c4STao Ma 	if (el->l_tree_depth) {
556facdb77fSJoel Becker 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
557facdb77fSJoel Becker 				      &eb_bh);
558f56654c4STao Ma 		if (ret) {
559f56654c4STao Ma 			mlog_errno(ret);
560f56654c4STao Ma 			goto out;
561f56654c4STao Ma 		}
562f56654c4STao Ma 
563f56654c4STao Ma 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
564f56654c4STao Ma 		el = &eb->h_list;
565f56654c4STao Ma 
566f56654c4STao Ma 		if (el->l_tree_depth) {
567f56654c4STao Ma 			ocfs2_error(inode->i_sb,
568f56654c4STao Ma 				    "Inode %lu has non zero tree depth in "
569f56654c4STao Ma 				    "xattr leaf block %llu\n", inode->i_ino,
570f56654c4STao Ma 				    (unsigned long long)eb_bh->b_blocknr);
571f56654c4STao Ma 			ret = -EROFS;
572f56654c4STao Ma 			goto out;
573f56654c4STao Ma 		}
574f56654c4STao Ma 	}
575f56654c4STao Ma 
576f56654c4STao Ma 	i = ocfs2_search_extent_list(el, v_cluster);
577f56654c4STao Ma 	if (i == -1) {
578f56654c4STao Ma 		ret = -EROFS;
579f56654c4STao Ma 		mlog_errno(ret);
580f56654c4STao Ma 		goto out;
581f56654c4STao Ma 	} else {
582f56654c4STao Ma 		rec = &el->l_recs[i];
583f56654c4STao Ma 		BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
584f56654c4STao Ma 
585f56654c4STao Ma 		if (!rec->e_blkno) {
586f56654c4STao Ma 			ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
587f56654c4STao Ma 				    "record (%u, %u, 0) in xattr", inode->i_ino,
588f56654c4STao Ma 				    le32_to_cpu(rec->e_cpos),
589f56654c4STao Ma 				    ocfs2_rec_clusters(el, rec));
590f56654c4STao Ma 			ret = -EROFS;
591f56654c4STao Ma 			goto out;
592f56654c4STao Ma 		}
593f56654c4STao Ma 		coff = v_cluster - le32_to_cpu(rec->e_cpos);
594f56654c4STao Ma 		*p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
595f56654c4STao Ma 						    le64_to_cpu(rec->e_blkno));
596f56654c4STao Ma 		*p_cluster = *p_cluster + coff;
597f56654c4STao Ma 		if (num_clusters)
598f56654c4STao Ma 			*num_clusters = ocfs2_rec_clusters(el, rec) - coff;
5991061f9c1STao Ma 
6001061f9c1STao Ma 		if (extent_flags)
6011061f9c1STao Ma 			*extent_flags = rec->e_flags;
602f56654c4STao Ma 	}
603f56654c4STao Ma out:
604f56654c4STao Ma 	if (eb_bh)
605f56654c4STao Ma 		brelse(eb_bh);
606f56654c4STao Ma 	return ret;
607f56654c4STao Ma }
608f56654c4STao Ma 
60900dc417fSMark Fasheh int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
61000dc417fSMark Fasheh 		       u32 *p_cluster, u32 *num_clusters,
61100dc417fSMark Fasheh 		       unsigned int *extent_flags)
61200dc417fSMark Fasheh {
61300dc417fSMark Fasheh 	int ret;
61400dc417fSMark Fasheh 	unsigned int uninitialized_var(hole_len), flags = 0;
61500dc417fSMark Fasheh 	struct buffer_head *di_bh = NULL;
61600dc417fSMark Fasheh 	struct ocfs2_extent_rec rec;
61783418978SMark Fasheh 
61800dc417fSMark Fasheh 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
61900dc417fSMark Fasheh 		ret = -ERANGE;
62000dc417fSMark Fasheh 		mlog_errno(ret);
62100dc417fSMark Fasheh 		goto out;
62200dc417fSMark Fasheh 	}
62300dc417fSMark Fasheh 
62400dc417fSMark Fasheh 	ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
62500dc417fSMark Fasheh 				      num_clusters, extent_flags);
62600dc417fSMark Fasheh 	if (ret == 0)
62700dc417fSMark Fasheh 		goto out;
62800dc417fSMark Fasheh 
629b657c95cSJoel Becker 	ret = ocfs2_read_inode_block(inode, &di_bh);
63000dc417fSMark Fasheh 	if (ret) {
63100dc417fSMark Fasheh 		mlog_errno(ret);
63200dc417fSMark Fasheh 		goto out;
63300dc417fSMark Fasheh 	}
63400dc417fSMark Fasheh 
63500dc417fSMark Fasheh 	ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
63600dc417fSMark Fasheh 					 &rec, NULL);
63700dc417fSMark Fasheh 	if (ret) {
63800dc417fSMark Fasheh 		mlog_errno(ret);
63900dc417fSMark Fasheh 		goto out;
64000dc417fSMark Fasheh 	}
64100dc417fSMark Fasheh 
64200dc417fSMark Fasheh 	if (rec.e_blkno == 0ULL) {
64300dc417fSMark Fasheh 		/*
64400dc417fSMark Fasheh 		 * A hole was found. Return some canned values that
64500dc417fSMark Fasheh 		 * callers can key on. If asked for, num_clusters will
64600dc417fSMark Fasheh 		 * be populated with the size of the hole.
64700dc417fSMark Fasheh 		 */
64800dc417fSMark Fasheh 		*p_cluster = 0;
64900dc417fSMark Fasheh 		if (num_clusters) {
65000dc417fSMark Fasheh 			*num_clusters = hole_len;
65100dc417fSMark Fasheh 		}
65200dc417fSMark Fasheh 	} else {
65300dc417fSMark Fasheh 		ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
65400dc417fSMark Fasheh 					      p_cluster, num_clusters);
65500dc417fSMark Fasheh 		flags = rec.e_flags;
65600dc417fSMark Fasheh 
65700dc417fSMark Fasheh 		ocfs2_extent_map_insert_rec(inode, &rec);
658363041a5SMark Fasheh 	}
659363041a5SMark Fasheh 
66049cb8d2dSMark Fasheh 	if (extent_flags)
66149cb8d2dSMark Fasheh 		*extent_flags = flags;
66249cb8d2dSMark Fasheh 
663363041a5SMark Fasheh out:
664363041a5SMark Fasheh 	brelse(di_bh);
665363041a5SMark Fasheh 	return ret;
666363041a5SMark Fasheh }
667363041a5SMark Fasheh 
668363041a5SMark Fasheh /*
669363041a5SMark Fasheh  * This expects alloc_sem to be held. The allocation cannot change at
670363041a5SMark Fasheh  * all while the map is in the process of being updated.
671363041a5SMark Fasheh  */
672363041a5SMark Fasheh int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
6734f902c37SMark Fasheh 				u64 *ret_count, unsigned int *extent_flags)
674363041a5SMark Fasheh {
675363041a5SMark Fasheh 	int ret;
676363041a5SMark Fasheh 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
677363041a5SMark Fasheh 	u32 cpos, num_clusters, p_cluster;
678363041a5SMark Fasheh 	u64 boff = 0;
679ccd979bdSMark Fasheh 
680ccd979bdSMark Fasheh 	cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
681ccd979bdSMark Fasheh 
68249cb8d2dSMark Fasheh 	ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
68349cb8d2dSMark Fasheh 				 extent_flags);
684ccd979bdSMark Fasheh 	if (ret) {
685ccd979bdSMark Fasheh 		mlog_errno(ret);
686363041a5SMark Fasheh 		goto out;
687ccd979bdSMark Fasheh 	}
688ccd979bdSMark Fasheh 
689363041a5SMark Fasheh 	/*
690363041a5SMark Fasheh 	 * p_cluster == 0 indicates a hole.
691363041a5SMark Fasheh 	 */
692363041a5SMark Fasheh 	if (p_cluster) {
693363041a5SMark Fasheh 		boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
694ccd979bdSMark Fasheh 		boff += (v_blkno & (u64)(bpc - 1));
695363041a5SMark Fasheh 	}
696363041a5SMark Fasheh 
697363041a5SMark Fasheh 	*p_blkno = boff;
698ccd979bdSMark Fasheh 
699ccd979bdSMark Fasheh 	if (ret_count) {
700363041a5SMark Fasheh 		*ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
701363041a5SMark Fasheh 		*ret_count -= v_blkno & (u64)(bpc - 1);
702ccd979bdSMark Fasheh 	}
703ccd979bdSMark Fasheh 
704363041a5SMark Fasheh out:
705363041a5SMark Fasheh 	return ret;
706ccd979bdSMark Fasheh }
70700dc417fSMark Fasheh 
70886239d59STristan Ye /*
70986239d59STristan Ye  * The ocfs2_fiemap_inline() may be a little bit misleading, since
71086239d59STristan Ye  * it not only handles the fiemap for inlined files, but also deals
71186239d59STristan Ye  * with the fast symlink, cause they have no difference for extent
71286239d59STristan Ye  * mapping per se.
71386239d59STristan Ye  */
71400dc417fSMark Fasheh static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
71500dc417fSMark Fasheh 			       struct fiemap_extent_info *fieinfo,
71600dc417fSMark Fasheh 			       u64 map_start)
71700dc417fSMark Fasheh {
71800dc417fSMark Fasheh 	int ret;
71900dc417fSMark Fasheh 	unsigned int id_count;
72000dc417fSMark Fasheh 	struct ocfs2_dinode *di;
72100dc417fSMark Fasheh 	u64 phys;
72200dc417fSMark Fasheh 	u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
72300dc417fSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
72400dc417fSMark Fasheh 
72500dc417fSMark Fasheh 	di = (struct ocfs2_dinode *)di_bh->b_data;
72686239d59STristan Ye 	if (ocfs2_inode_is_fast_symlink(inode))
72786239d59STristan Ye 		id_count = ocfs2_fast_symlink_chars(inode->i_sb);
72886239d59STristan Ye 	else
72900dc417fSMark Fasheh 		id_count = le16_to_cpu(di->id2.i_data.id_count);
73000dc417fSMark Fasheh 
73100dc417fSMark Fasheh 	if (map_start < id_count) {
73200dc417fSMark Fasheh 		phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
73386239d59STristan Ye 		if (ocfs2_inode_is_fast_symlink(inode))
73486239d59STristan Ye 			phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
73586239d59STristan Ye 		else
73686239d59STristan Ye 			phys += offsetof(struct ocfs2_dinode,
73786239d59STristan Ye 					 id2.i_data.id_data);
73800dc417fSMark Fasheh 
73900dc417fSMark Fasheh 		ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
74000dc417fSMark Fasheh 					      flags);
74100dc417fSMark Fasheh 		if (ret < 0)
74200dc417fSMark Fasheh 			return ret;
74300dc417fSMark Fasheh 	}
74400dc417fSMark Fasheh 
74500dc417fSMark Fasheh 	return 0;
74600dc417fSMark Fasheh }
74700dc417fSMark Fasheh 
74800dc417fSMark Fasheh #define OCFS2_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC)
74900dc417fSMark Fasheh 
75000dc417fSMark Fasheh int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
75100dc417fSMark Fasheh 		 u64 map_start, u64 map_len)
75200dc417fSMark Fasheh {
75300dc417fSMark Fasheh 	int ret, is_last;
75400dc417fSMark Fasheh 	u32 mapping_end, cpos;
75500dc417fSMark Fasheh 	unsigned int hole_size;
75600dc417fSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
75700dc417fSMark Fasheh 	u64 len_bytes, phys_bytes, virt_bytes;
75800dc417fSMark Fasheh 	struct buffer_head *di_bh = NULL;
75900dc417fSMark Fasheh 	struct ocfs2_extent_rec rec;
76000dc417fSMark Fasheh 
76100dc417fSMark Fasheh 	ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
76200dc417fSMark Fasheh 	if (ret)
76300dc417fSMark Fasheh 		return ret;
76400dc417fSMark Fasheh 
76500dc417fSMark Fasheh 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
76600dc417fSMark Fasheh 	if (ret) {
76700dc417fSMark Fasheh 		mlog_errno(ret);
76800dc417fSMark Fasheh 		goto out;
76900dc417fSMark Fasheh 	}
77000dc417fSMark Fasheh 
77100dc417fSMark Fasheh 	down_read(&OCFS2_I(inode)->ip_alloc_sem);
77200dc417fSMark Fasheh 
77300dc417fSMark Fasheh 	/*
77486239d59STristan Ye 	 * Handle inline-data and fast symlink separately.
77500dc417fSMark Fasheh 	 */
77686239d59STristan Ye 	if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
77786239d59STristan Ye 	    ocfs2_inode_is_fast_symlink(inode)) {
77800dc417fSMark Fasheh 		ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
77900dc417fSMark Fasheh 		goto out_unlock;
78000dc417fSMark Fasheh 	}
78100dc417fSMark Fasheh 
78200dc417fSMark Fasheh 	cpos = map_start >> osb->s_clustersize_bits;
78300dc417fSMark Fasheh 	mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
78400dc417fSMark Fasheh 					       map_start + map_len);
78500dc417fSMark Fasheh 	mapping_end -= cpos;
78600dc417fSMark Fasheh 	is_last = 0;
78700dc417fSMark Fasheh 	while (cpos < mapping_end && !is_last) {
78800dc417fSMark Fasheh 		u32 fe_flags;
78900dc417fSMark Fasheh 
79000dc417fSMark Fasheh 		ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
79100dc417fSMark Fasheh 						 &hole_size, &rec, &is_last);
79200dc417fSMark Fasheh 		if (ret) {
79300dc417fSMark Fasheh 			mlog_errno(ret);
79400dc417fSMark Fasheh 			goto out;
79500dc417fSMark Fasheh 		}
79600dc417fSMark Fasheh 
79700dc417fSMark Fasheh 		if (rec.e_blkno == 0ULL) {
79800dc417fSMark Fasheh 			cpos += hole_size;
79900dc417fSMark Fasheh 			continue;
80000dc417fSMark Fasheh 		}
80100dc417fSMark Fasheh 
80200dc417fSMark Fasheh 		fe_flags = 0;
80300dc417fSMark Fasheh 		if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
80400dc417fSMark Fasheh 			fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
805faf8b70fSSunil Mushran 		if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
806faf8b70fSSunil Mushran 			fe_flags |= FIEMAP_EXTENT_SHARED;
80700dc417fSMark Fasheh 		if (is_last)
80800dc417fSMark Fasheh 			fe_flags |= FIEMAP_EXTENT_LAST;
80900dc417fSMark Fasheh 		len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
81000dc417fSMark Fasheh 		phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
81100dc417fSMark Fasheh 		virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
81200dc417fSMark Fasheh 
81300dc417fSMark Fasheh 		ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
81400dc417fSMark Fasheh 					      len_bytes, fe_flags);
81500dc417fSMark Fasheh 		if (ret)
81600dc417fSMark Fasheh 			break;
81700dc417fSMark Fasheh 
81800dc417fSMark Fasheh 		cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
81900dc417fSMark Fasheh 	}
82000dc417fSMark Fasheh 
82100dc417fSMark Fasheh 	if (ret > 0)
82200dc417fSMark Fasheh 		ret = 0;
82300dc417fSMark Fasheh 
82400dc417fSMark Fasheh out_unlock:
82500dc417fSMark Fasheh 	brelse(di_bh);
82600dc417fSMark Fasheh 
82700dc417fSMark Fasheh 	up_read(&OCFS2_I(inode)->ip_alloc_sem);
82800dc417fSMark Fasheh 
82900dc417fSMark Fasheh 	ocfs2_inode_unlock(inode, 0);
83000dc417fSMark Fasheh out:
83100dc417fSMark Fasheh 
83200dc417fSMark Fasheh 	return ret;
83300dc417fSMark Fasheh }
834a8549fb5SJoel Becker 
835a8549fb5SJoel Becker int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
836a8549fb5SJoel Becker 			   struct buffer_head *bhs[], int flags,
837a8549fb5SJoel Becker 			   int (*validate)(struct super_block *sb,
838a8549fb5SJoel Becker 					   struct buffer_head *bh))
839a8549fb5SJoel Becker {
840a8549fb5SJoel Becker 	int rc = 0;
841a8549fb5SJoel Becker 	u64 p_block, p_count;
842a8549fb5SJoel Becker 	int i, count, done = 0;
843a8549fb5SJoel Becker 
844a8549fb5SJoel Becker 	mlog_entry("(inode = %p, v_block = %llu, nr = %d, bhs = %p, "
845a8549fb5SJoel Becker 		   "flags = %x, validate = %p)\n",
846a8549fb5SJoel Becker 		   inode, (unsigned long long)v_block, nr, bhs, flags,
847a8549fb5SJoel Becker 		   validate);
848a8549fb5SJoel Becker 
849a8549fb5SJoel Becker 	if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
850a8549fb5SJoel Becker 	    i_size_read(inode)) {
851a8549fb5SJoel Becker 		BUG_ON(!(flags & OCFS2_BH_READAHEAD));
852a8549fb5SJoel Becker 		goto out;
853a8549fb5SJoel Becker 	}
854a8549fb5SJoel Becker 
855a8549fb5SJoel Becker 	while (done < nr) {
856a8549fb5SJoel Becker 		down_read(&OCFS2_I(inode)->ip_alloc_sem);
857a8549fb5SJoel Becker 		rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
858a8549fb5SJoel Becker 						 &p_block, &p_count, NULL);
859a8549fb5SJoel Becker 		up_read(&OCFS2_I(inode)->ip_alloc_sem);
860a8549fb5SJoel Becker 		if (rc) {
861a8549fb5SJoel Becker 			mlog_errno(rc);
862a8549fb5SJoel Becker 			break;
863a8549fb5SJoel Becker 		}
864a8549fb5SJoel Becker 
865a8549fb5SJoel Becker 		if (!p_block) {
866a8549fb5SJoel Becker 			rc = -EIO;
867a8549fb5SJoel Becker 			mlog(ML_ERROR,
868a8549fb5SJoel Becker 			     "Inode #%llu contains a hole at offset %llu\n",
869a8549fb5SJoel Becker 			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
870a8549fb5SJoel Becker 			     (unsigned long long)(v_block + done) <<
871a8549fb5SJoel Becker 			     inode->i_sb->s_blocksize_bits);
872a8549fb5SJoel Becker 			break;
873a8549fb5SJoel Becker 		}
874a8549fb5SJoel Becker 
875a8549fb5SJoel Becker 		count = nr - done;
876a8549fb5SJoel Becker 		if (p_count < count)
877a8549fb5SJoel Becker 			count = p_count;
878a8549fb5SJoel Becker 
879a8549fb5SJoel Becker 		/*
880a8549fb5SJoel Becker 		 * If the caller passed us bhs, they should have come
881a8549fb5SJoel Becker 		 * from a previous readahead call to this function.  Thus,
882a8549fb5SJoel Becker 		 * they should have the right b_blocknr.
883a8549fb5SJoel Becker 		 */
884a8549fb5SJoel Becker 		for (i = 0; i < count; i++) {
885a8549fb5SJoel Becker 			if (!bhs[done + i])
886a8549fb5SJoel Becker 				continue;
887a8549fb5SJoel Becker 			BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
888a8549fb5SJoel Becker 		}
889a8549fb5SJoel Becker 
8908cb471e8SJoel Becker 		rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
8918cb471e8SJoel Becker 				       bhs + done, flags, validate);
892a8549fb5SJoel Becker 		if (rc) {
893a8549fb5SJoel Becker 			mlog_errno(rc);
894a8549fb5SJoel Becker 			break;
895a8549fb5SJoel Becker 		}
896a8549fb5SJoel Becker 		done += count;
897a8549fb5SJoel Becker 	}
898a8549fb5SJoel Becker 
899a8549fb5SJoel Becker out:
900a8549fb5SJoel Becker 	mlog_exit(rc);
901a8549fb5SJoel Becker 	return rc;
902a8549fb5SJoel Becker }
903a8549fb5SJoel Becker 
904a8549fb5SJoel Becker 
905