xref: /linux/fs/ocfs2/extent_map.c (revision 45dd052e67ad17c7a24874a783f41aeab15bc294)
1921a3d4dSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*-
3ccd979bdSMark Fasheh  * vim: noexpandtab sw=8 ts=8 sts=0:
4ccd979bdSMark Fasheh  *
5ccd979bdSMark Fasheh  * extent_map.c
6ccd979bdSMark Fasheh  *
7363041a5SMark Fasheh  * Block/Cluster mapping functions
8ccd979bdSMark Fasheh  *
9ccd979bdSMark Fasheh  * Copyright (C) 2004 Oracle.  All rights reserved.
10ccd979bdSMark Fasheh  */
11ccd979bdSMark Fasheh 
12ccd979bdSMark Fasheh #include <linux/fs.h>
13ccd979bdSMark Fasheh #include <linux/init.h>
145a0e3ad6STejun Heo #include <linux/slab.h>
15ccd979bdSMark Fasheh #include <linux/types.h>
1600dc417fSMark Fasheh #include <linux/fiemap.h>
17ccd979bdSMark Fasheh 
18ccd979bdSMark Fasheh #include <cluster/masklog.h>
19ccd979bdSMark Fasheh 
20ccd979bdSMark Fasheh #include "ocfs2.h"
21ccd979bdSMark Fasheh 
22363041a5SMark Fasheh #include "alloc.h"
2300dc417fSMark Fasheh #include "dlmglue.h"
24ccd979bdSMark Fasheh #include "extent_map.h"
25ccd979bdSMark Fasheh #include "inode.h"
26ccd979bdSMark Fasheh #include "super.h"
2786239d59STristan Ye #include "symlink.h"
28ac604d3cSGang He #include "aops.h"
29a716357cSTao Ma #include "ocfs2_trace.h"
30ccd979bdSMark Fasheh 
31ccd979bdSMark Fasheh #include "buffer_head_io.h"
32ccd979bdSMark Fasheh 
33ccd979bdSMark Fasheh /*
3483418978SMark Fasheh  * The extent caching implementation is intentionally trivial.
3583418978SMark Fasheh  *
3683418978SMark Fasheh  * We only cache a small number of extents stored directly on the
3783418978SMark Fasheh  * inode, so linear order operations are acceptable. If we ever want
3883418978SMark Fasheh  * to increase the size of the extent map, then these algorithms must
3983418978SMark Fasheh  * get smarter.
4083418978SMark Fasheh  */
4183418978SMark Fasheh 
4283418978SMark Fasheh void ocfs2_extent_map_init(struct inode *inode)
4383418978SMark Fasheh {
4483418978SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
4583418978SMark Fasheh 
4683418978SMark Fasheh 	oi->ip_extent_map.em_num_items = 0;
4783418978SMark Fasheh 	INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
4883418978SMark Fasheh }
4983418978SMark Fasheh 
5083418978SMark Fasheh static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
5183418978SMark Fasheh 				      unsigned int cpos,
5283418978SMark Fasheh 				      struct ocfs2_extent_map_item **ret_emi)
5383418978SMark Fasheh {
5483418978SMark Fasheh 	unsigned int range;
5583418978SMark Fasheh 	struct ocfs2_extent_map_item *emi;
5683418978SMark Fasheh 
5783418978SMark Fasheh 	*ret_emi = NULL;
5883418978SMark Fasheh 
5983418978SMark Fasheh 	list_for_each_entry(emi, &em->em_list, ei_list) {
6083418978SMark Fasheh 		range = emi->ei_cpos + emi->ei_clusters;
6183418978SMark Fasheh 
6283418978SMark Fasheh 		if (cpos >= emi->ei_cpos && cpos < range) {
6383418978SMark Fasheh 			list_move(&emi->ei_list, &em->em_list);
6483418978SMark Fasheh 
6583418978SMark Fasheh 			*ret_emi = emi;
6683418978SMark Fasheh 			break;
6783418978SMark Fasheh 		}
6883418978SMark Fasheh 	}
6983418978SMark Fasheh }
7083418978SMark Fasheh 
7183418978SMark Fasheh static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
7283418978SMark Fasheh 				   unsigned int *phys, unsigned int *len,
7383418978SMark Fasheh 				   unsigned int *flags)
7483418978SMark Fasheh {
7583418978SMark Fasheh 	unsigned int coff;
7683418978SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
7783418978SMark Fasheh 	struct ocfs2_extent_map_item *emi;
7883418978SMark Fasheh 
7983418978SMark Fasheh 	spin_lock(&oi->ip_lock);
8083418978SMark Fasheh 
8183418978SMark Fasheh 	__ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
8283418978SMark Fasheh 	if (emi) {
8383418978SMark Fasheh 		coff = cpos - emi->ei_cpos;
8483418978SMark Fasheh 		*phys = emi->ei_phys + coff;
8583418978SMark Fasheh 		if (len)
8683418978SMark Fasheh 			*len = emi->ei_clusters - coff;
8783418978SMark Fasheh 		if (flags)
8883418978SMark Fasheh 			*flags = emi->ei_flags;
8983418978SMark Fasheh 	}
9083418978SMark Fasheh 
9183418978SMark Fasheh 	spin_unlock(&oi->ip_lock);
9283418978SMark Fasheh 
9383418978SMark Fasheh 	if (emi == NULL)
9483418978SMark Fasheh 		return -ENOENT;
9583418978SMark Fasheh 
9683418978SMark Fasheh 	return 0;
9783418978SMark Fasheh }
9883418978SMark Fasheh 
9983418978SMark Fasheh /*
10083418978SMark Fasheh  * Forget about all clusters equal to or greater than cpos.
10183418978SMark Fasheh  */
10283418978SMark Fasheh void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
10383418978SMark Fasheh {
104800deef3SChristoph Hellwig 	struct ocfs2_extent_map_item *emi, *n;
10583418978SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
10683418978SMark Fasheh 	struct ocfs2_extent_map *em = &oi->ip_extent_map;
10783418978SMark Fasheh 	LIST_HEAD(tmp_list);
10883418978SMark Fasheh 	unsigned int range;
10983418978SMark Fasheh 
11083418978SMark Fasheh 	spin_lock(&oi->ip_lock);
111800deef3SChristoph Hellwig 	list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
11283418978SMark Fasheh 		if (emi->ei_cpos >= cpos) {
11383418978SMark Fasheh 			/* Full truncate of this record. */
11483418978SMark Fasheh 			list_move(&emi->ei_list, &tmp_list);
11583418978SMark Fasheh 			BUG_ON(em->em_num_items == 0);
11683418978SMark Fasheh 			em->em_num_items--;
11783418978SMark Fasheh 			continue;
11883418978SMark Fasheh 		}
11983418978SMark Fasheh 
12083418978SMark Fasheh 		range = emi->ei_cpos + emi->ei_clusters;
12183418978SMark Fasheh 		if (range > cpos) {
12283418978SMark Fasheh 			/* Partial truncate */
12383418978SMark Fasheh 			emi->ei_clusters = cpos - emi->ei_cpos;
12483418978SMark Fasheh 		}
12583418978SMark Fasheh 	}
12683418978SMark Fasheh 	spin_unlock(&oi->ip_lock);
12783418978SMark Fasheh 
128800deef3SChristoph Hellwig 	list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
12983418978SMark Fasheh 		list_del(&emi->ei_list);
13083418978SMark Fasheh 		kfree(emi);
13183418978SMark Fasheh 	}
13283418978SMark Fasheh }
13383418978SMark Fasheh 
13483418978SMark Fasheh /*
13583418978SMark Fasheh  * Is any part of emi2 contained within emi1
13683418978SMark Fasheh  */
13783418978SMark Fasheh static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
13883418978SMark Fasheh 				 struct ocfs2_extent_map_item *emi2)
13983418978SMark Fasheh {
14083418978SMark Fasheh 	unsigned int range1, range2;
14183418978SMark Fasheh 
14283418978SMark Fasheh 	/*
14383418978SMark Fasheh 	 * Check if logical start of emi2 is inside emi1
14483418978SMark Fasheh 	 */
14583418978SMark Fasheh 	range1 = emi1->ei_cpos + emi1->ei_clusters;
14683418978SMark Fasheh 	if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
14783418978SMark Fasheh 		return 1;
14883418978SMark Fasheh 
14983418978SMark Fasheh 	/*
15083418978SMark Fasheh 	 * Check if logical end of emi2 is inside emi1
15183418978SMark Fasheh 	 */
15283418978SMark Fasheh 	range2 = emi2->ei_cpos + emi2->ei_clusters;
15383418978SMark Fasheh 	if (range2 > emi1->ei_cpos && range2 <= range1)
15483418978SMark Fasheh 		return 1;
15583418978SMark Fasheh 
15683418978SMark Fasheh 	return 0;
15783418978SMark Fasheh }
15883418978SMark Fasheh 
15983418978SMark Fasheh static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
16083418978SMark Fasheh 				  struct ocfs2_extent_map_item *src)
16183418978SMark Fasheh {
16283418978SMark Fasheh 	dest->ei_cpos = src->ei_cpos;
16383418978SMark Fasheh 	dest->ei_phys = src->ei_phys;
16483418978SMark Fasheh 	dest->ei_clusters = src->ei_clusters;
16583418978SMark Fasheh 	dest->ei_flags = src->ei_flags;
16683418978SMark Fasheh }
16783418978SMark Fasheh 
16883418978SMark Fasheh /*
16983418978SMark Fasheh  * Try to merge emi with ins. Returns 1 if merge succeeds, zero
17083418978SMark Fasheh  * otherwise.
17183418978SMark Fasheh  */
17283418978SMark Fasheh static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
17383418978SMark Fasheh 					 struct ocfs2_extent_map_item *ins)
17483418978SMark Fasheh {
17583418978SMark Fasheh 	/*
17683418978SMark Fasheh 	 * Handle contiguousness
17783418978SMark Fasheh 	 */
17883418978SMark Fasheh 	if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
17983418978SMark Fasheh 	    ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
18083418978SMark Fasheh 	    ins->ei_flags == emi->ei_flags) {
18183418978SMark Fasheh 		emi->ei_clusters += ins->ei_clusters;
18283418978SMark Fasheh 		return 1;
18383418978SMark Fasheh 	} else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
184bd6b0bf8SRoel Kluin 		   (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
18583418978SMark Fasheh 		   ins->ei_flags == emi->ei_flags) {
18683418978SMark Fasheh 		emi->ei_phys = ins->ei_phys;
18783418978SMark Fasheh 		emi->ei_cpos = ins->ei_cpos;
18883418978SMark Fasheh 		emi->ei_clusters += ins->ei_clusters;
18983418978SMark Fasheh 		return 1;
19083418978SMark Fasheh 	}
19183418978SMark Fasheh 
19283418978SMark Fasheh 	/*
19383418978SMark Fasheh 	 * Overlapping extents - this shouldn't happen unless we've
19483418978SMark Fasheh 	 * split an extent to change it's flags. That is exceedingly
19583418978SMark Fasheh 	 * rare, so there's no sense in trying to optimize it yet.
19683418978SMark Fasheh 	 */
19783418978SMark Fasheh 	if (ocfs2_ei_is_contained(emi, ins) ||
19883418978SMark Fasheh 	    ocfs2_ei_is_contained(ins, emi)) {
19983418978SMark Fasheh 		ocfs2_copy_emi_fields(emi, ins);
20083418978SMark Fasheh 		return 1;
20183418978SMark Fasheh 	}
20283418978SMark Fasheh 
20383418978SMark Fasheh 	/* No merge was possible. */
20483418978SMark Fasheh 	return 0;
20583418978SMark Fasheh }
20683418978SMark Fasheh 
20783418978SMark Fasheh /*
20883418978SMark Fasheh  * In order to reduce complexity on the caller, this insert function
20983418978SMark Fasheh  * is intentionally liberal in what it will accept.
21083418978SMark Fasheh  *
21183418978SMark Fasheh  * The only rule is that the truncate call *must* be used whenever
21283418978SMark Fasheh  * records have been deleted. This avoids inserting overlapping
21383418978SMark Fasheh  * records with different physical mappings.
21483418978SMark Fasheh  */
21583418978SMark Fasheh void ocfs2_extent_map_insert_rec(struct inode *inode,
21683418978SMark Fasheh 				 struct ocfs2_extent_rec *rec)
21783418978SMark Fasheh {
21883418978SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
21983418978SMark Fasheh 	struct ocfs2_extent_map *em = &oi->ip_extent_map;
22083418978SMark Fasheh 	struct ocfs2_extent_map_item *emi, *new_emi = NULL;
22183418978SMark Fasheh 	struct ocfs2_extent_map_item ins;
22283418978SMark Fasheh 
22383418978SMark Fasheh 	ins.ei_cpos = le32_to_cpu(rec->e_cpos);
22483418978SMark Fasheh 	ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
22583418978SMark Fasheh 					       le64_to_cpu(rec->e_blkno));
22683418978SMark Fasheh 	ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
22783418978SMark Fasheh 	ins.ei_flags = rec->e_flags;
22883418978SMark Fasheh 
22983418978SMark Fasheh search:
23083418978SMark Fasheh 	spin_lock(&oi->ip_lock);
23183418978SMark Fasheh 
23283418978SMark Fasheh 	list_for_each_entry(emi, &em->em_list, ei_list) {
23383418978SMark Fasheh 		if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
23483418978SMark Fasheh 			list_move(&emi->ei_list, &em->em_list);
23583418978SMark Fasheh 			spin_unlock(&oi->ip_lock);
23683418978SMark Fasheh 			goto out;
23783418978SMark Fasheh 		}
23883418978SMark Fasheh 	}
23983418978SMark Fasheh 
24083418978SMark Fasheh 	/*
24183418978SMark Fasheh 	 * No item could be merged.
24283418978SMark Fasheh 	 *
24383418978SMark Fasheh 	 * Either allocate and add a new item, or overwrite the last recently
24483418978SMark Fasheh 	 * inserted.
24583418978SMark Fasheh 	 */
24683418978SMark Fasheh 
24783418978SMark Fasheh 	if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
24883418978SMark Fasheh 		if (new_emi == NULL) {
24983418978SMark Fasheh 			spin_unlock(&oi->ip_lock);
25083418978SMark Fasheh 
25183418978SMark Fasheh 			new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
25283418978SMark Fasheh 			if (new_emi == NULL)
25383418978SMark Fasheh 				goto out;
25483418978SMark Fasheh 
25583418978SMark Fasheh 			goto search;
25683418978SMark Fasheh 		}
25783418978SMark Fasheh 
25883418978SMark Fasheh 		ocfs2_copy_emi_fields(new_emi, &ins);
25983418978SMark Fasheh 		list_add(&new_emi->ei_list, &em->em_list);
26083418978SMark Fasheh 		em->em_num_items++;
26183418978SMark Fasheh 		new_emi = NULL;
26283418978SMark Fasheh 	} else {
26383418978SMark Fasheh 		BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
26483418978SMark Fasheh 		emi = list_entry(em->em_list.prev,
26583418978SMark Fasheh 				 struct ocfs2_extent_map_item, ei_list);
26683418978SMark Fasheh 		list_move(&emi->ei_list, &em->em_list);
26783418978SMark Fasheh 		ocfs2_copy_emi_fields(emi, &ins);
26883418978SMark Fasheh 	}
26983418978SMark Fasheh 
27083418978SMark Fasheh 	spin_unlock(&oi->ip_lock);
27183418978SMark Fasheh 
27283418978SMark Fasheh out:
27383418978SMark Fasheh 	kfree(new_emi);
27483418978SMark Fasheh }
27583418978SMark Fasheh 
27600dc417fSMark Fasheh static int ocfs2_last_eb_is_empty(struct inode *inode,
27700dc417fSMark Fasheh 				  struct ocfs2_dinode *di)
27800dc417fSMark Fasheh {
27900dc417fSMark Fasheh 	int ret, next_free;
28000dc417fSMark Fasheh 	u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
28100dc417fSMark Fasheh 	struct buffer_head *eb_bh = NULL;
28200dc417fSMark Fasheh 	struct ocfs2_extent_block *eb;
28300dc417fSMark Fasheh 	struct ocfs2_extent_list *el;
28400dc417fSMark Fasheh 
2853d03a305SJoel Becker 	ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
28600dc417fSMark Fasheh 	if (ret) {
28700dc417fSMark Fasheh 		mlog_errno(ret);
28800dc417fSMark Fasheh 		goto out;
28900dc417fSMark Fasheh 	}
29000dc417fSMark Fasheh 
29100dc417fSMark Fasheh 	eb = (struct ocfs2_extent_block *) eb_bh->b_data;
29200dc417fSMark Fasheh 	el = &eb->h_list;
29300dc417fSMark Fasheh 
29400dc417fSMark Fasheh 	if (el->l_tree_depth) {
29500dc417fSMark Fasheh 		ocfs2_error(inode->i_sb,
2967ecef14aSJoe Perches 			    "Inode %lu has non zero tree depth in leaf block %llu\n",
2977ecef14aSJoe Perches 			    inode->i_ino,
29800dc417fSMark Fasheh 			    (unsigned long long)eb_bh->b_blocknr);
29900dc417fSMark Fasheh 		ret = -EROFS;
30000dc417fSMark Fasheh 		goto out;
30100dc417fSMark Fasheh 	}
30200dc417fSMark Fasheh 
30300dc417fSMark Fasheh 	next_free = le16_to_cpu(el->l_next_free_rec);
30400dc417fSMark Fasheh 
30500dc417fSMark Fasheh 	if (next_free == 0 ||
30600dc417fSMark Fasheh 	    (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
30700dc417fSMark Fasheh 		ret = 1;
30800dc417fSMark Fasheh 
30900dc417fSMark Fasheh out:
31000dc417fSMark Fasheh 	brelse(eb_bh);
31100dc417fSMark Fasheh 	return ret;
31200dc417fSMark Fasheh }
31300dc417fSMark Fasheh 
31483418978SMark Fasheh /*
3154f902c37SMark Fasheh  * Return the 1st index within el which contains an extent start
3164f902c37SMark Fasheh  * larger than v_cluster.
3174f902c37SMark Fasheh  */
3184f902c37SMark Fasheh static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
3194f902c37SMark Fasheh 				       u32 v_cluster)
3204f902c37SMark Fasheh {
3214f902c37SMark Fasheh 	int i;
3224f902c37SMark Fasheh 	struct ocfs2_extent_rec *rec;
3234f902c37SMark Fasheh 
3244f902c37SMark Fasheh 	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
3254f902c37SMark Fasheh 		rec = &el->l_recs[i];
3264f902c37SMark Fasheh 
3274f902c37SMark Fasheh 		if (v_cluster < le32_to_cpu(rec->e_cpos))
3284f902c37SMark Fasheh 			break;
3294f902c37SMark Fasheh 	}
3304f902c37SMark Fasheh 
3314f902c37SMark Fasheh 	return i;
3324f902c37SMark Fasheh }
3334f902c37SMark Fasheh 
3344f902c37SMark Fasheh /*
3354f902c37SMark Fasheh  * Figure out the size of a hole which starts at v_cluster within the given
3364f902c37SMark Fasheh  * extent list.
3374f902c37SMark Fasheh  *
3384f902c37SMark Fasheh  * If there is no more allocation past v_cluster, we return the maximum
3394f902c37SMark Fasheh  * cluster size minus v_cluster.
3404f902c37SMark Fasheh  *
3414f902c37SMark Fasheh  * If we have in-inode extents, then el points to the dinode list and
3424f902c37SMark Fasheh  * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
3434f902c37SMark Fasheh  * containing el.
3444f902c37SMark Fasheh  */
345e73a819dSTao Ma int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
3464f902c37SMark Fasheh 			       struct ocfs2_extent_list *el,
3474f902c37SMark Fasheh 			       struct buffer_head *eb_bh,
3484f902c37SMark Fasheh 			       u32 v_cluster,
3494f902c37SMark Fasheh 			       u32 *num_clusters)
3504f902c37SMark Fasheh {
3514f902c37SMark Fasheh 	int ret, i;
3524f902c37SMark Fasheh 	struct buffer_head *next_eb_bh = NULL;
3534f902c37SMark Fasheh 	struct ocfs2_extent_block *eb, *next_eb;
3544f902c37SMark Fasheh 
3554f902c37SMark Fasheh 	i = ocfs2_search_for_hole_index(el, v_cluster);
3564f902c37SMark Fasheh 
3574f902c37SMark Fasheh 	if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
3584f902c37SMark Fasheh 		eb = (struct ocfs2_extent_block *)eb_bh->b_data;
3594f902c37SMark Fasheh 
3604f902c37SMark Fasheh 		/*
3614f902c37SMark Fasheh 		 * Check the next leaf for any extents.
3624f902c37SMark Fasheh 		 */
3634f902c37SMark Fasheh 
3644f902c37SMark Fasheh 		if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
3654f902c37SMark Fasheh 			goto no_more_extents;
3664f902c37SMark Fasheh 
367e73a819dSTao Ma 		ret = ocfs2_read_extent_block(ci,
3684f902c37SMark Fasheh 					      le64_to_cpu(eb->h_next_leaf_blk),
3690fcaa56aSJoel Becker 					      &next_eb_bh);
3704f902c37SMark Fasheh 		if (ret) {
3714f902c37SMark Fasheh 			mlog_errno(ret);
3724f902c37SMark Fasheh 			goto out;
3734f902c37SMark Fasheh 		}
3745e96581aSJoel Becker 
3754f902c37SMark Fasheh 		next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
3764f902c37SMark Fasheh 		el = &next_eb->h_list;
3774f902c37SMark Fasheh 		i = ocfs2_search_for_hole_index(el, v_cluster);
3784f902c37SMark Fasheh 	}
3794f902c37SMark Fasheh 
3804f902c37SMark Fasheh no_more_extents:
3814f902c37SMark Fasheh 	if (i == le16_to_cpu(el->l_next_free_rec)) {
3824f902c37SMark Fasheh 		/*
3834f902c37SMark Fasheh 		 * We're at the end of our existing allocation. Just
3844f902c37SMark Fasheh 		 * return the maximum number of clusters we could
3854f902c37SMark Fasheh 		 * possibly allocate.
3864f902c37SMark Fasheh 		 */
3874f902c37SMark Fasheh 		*num_clusters = UINT_MAX - v_cluster;
3884f902c37SMark Fasheh 	} else {
3894f902c37SMark Fasheh 		*num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
3904f902c37SMark Fasheh 	}
3914f902c37SMark Fasheh 
3924f902c37SMark Fasheh 	ret = 0;
3934f902c37SMark Fasheh out:
3944f902c37SMark Fasheh 	brelse(next_eb_bh);
3954f902c37SMark Fasheh 	return ret;
3964f902c37SMark Fasheh }
3974f902c37SMark Fasheh 
39800dc417fSMark Fasheh static int ocfs2_get_clusters_nocache(struct inode *inode,
39900dc417fSMark Fasheh 				      struct buffer_head *di_bh,
40000dc417fSMark Fasheh 				      u32 v_cluster, unsigned int *hole_len,
40100dc417fSMark Fasheh 				      struct ocfs2_extent_rec *ret_rec,
40200dc417fSMark Fasheh 				      unsigned int *is_last)
403ccd979bdSMark Fasheh {
40400dc417fSMark Fasheh 	int i, ret, tree_height, len;
405ccd979bdSMark Fasheh 	struct ocfs2_dinode *di;
40600dc417fSMark Fasheh 	struct ocfs2_extent_block *uninitialized_var(eb);
407ccd979bdSMark Fasheh 	struct ocfs2_extent_list *el;
408ccd979bdSMark Fasheh 	struct ocfs2_extent_rec *rec;
40900dc417fSMark Fasheh 	struct buffer_head *eb_bh = NULL;
410ccd979bdSMark Fasheh 
41100dc417fSMark Fasheh 	memset(ret_rec, 0, sizeof(*ret_rec));
41200dc417fSMark Fasheh 	if (is_last)
41300dc417fSMark Fasheh 		*is_last = 0;
414363041a5SMark Fasheh 
415363041a5SMark Fasheh 	di = (struct ocfs2_dinode *) di_bh->b_data;
416363041a5SMark Fasheh 	el = &di->id2.i_list;
41700dc417fSMark Fasheh 	tree_height = le16_to_cpu(el->l_tree_depth);
418363041a5SMark Fasheh 
41900dc417fSMark Fasheh 	if (tree_height > 0) {
420facdb77fSJoel Becker 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
421facdb77fSJoel Becker 				      &eb_bh);
422363041a5SMark Fasheh 		if (ret) {
423363041a5SMark Fasheh 			mlog_errno(ret);
424363041a5SMark Fasheh 			goto out;
425363041a5SMark Fasheh 		}
426363041a5SMark Fasheh 
427363041a5SMark Fasheh 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
428363041a5SMark Fasheh 		el = &eb->h_list;
429e48edee2SMark Fasheh 
430e48edee2SMark Fasheh 		if (el->l_tree_depth) {
431e48edee2SMark Fasheh 			ocfs2_error(inode->i_sb,
4327ecef14aSJoe Perches 				    "Inode %lu has non zero tree depth in leaf block %llu\n",
4337ecef14aSJoe Perches 				    inode->i_ino,
434e48edee2SMark Fasheh 				    (unsigned long long)eb_bh->b_blocknr);
435e48edee2SMark Fasheh 			ret = -EROFS;
436e48edee2SMark Fasheh 			goto out;
437e48edee2SMark Fasheh 		}
438363041a5SMark Fasheh 	}
439363041a5SMark Fasheh 
440363041a5SMark Fasheh 	i = ocfs2_search_extent_list(el, v_cluster);
441363041a5SMark Fasheh 	if (i == -1) {
442363041a5SMark Fasheh 		/*
44300dc417fSMark Fasheh 		 * Holes can be larger than the maximum size of an
4443ad2f3fbSDaniel Mack 		 * extent, so we return their lengths in a separate
44500dc417fSMark Fasheh 		 * field.
446363041a5SMark Fasheh 		 */
44700dc417fSMark Fasheh 		if (hole_len) {
448e73a819dSTao Ma 			ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
449e73a819dSTao Ma 							 el, eb_bh,
45000dc417fSMark Fasheh 							 v_cluster, &len);
4514f902c37SMark Fasheh 			if (ret) {
4524f902c37SMark Fasheh 				mlog_errno(ret);
4534f902c37SMark Fasheh 				goto out;
4544f902c37SMark Fasheh 			}
45500dc417fSMark Fasheh 
45600dc417fSMark Fasheh 			*hole_len = len;
4574f902c37SMark Fasheh 		}
45800dc417fSMark Fasheh 		goto out_hole;
45900dc417fSMark Fasheh 	}
46000dc417fSMark Fasheh 
461363041a5SMark Fasheh 	rec = &el->l_recs[i];
462363041a5SMark Fasheh 
463363041a5SMark Fasheh 	BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
464363041a5SMark Fasheh 
465363041a5SMark Fasheh 	if (!rec->e_blkno) {
4667ecef14aSJoe Perches 		ocfs2_error(inode->i_sb,
4677ecef14aSJoe Perches 			    "Inode %lu has bad extent record (%u, %u, 0)\n",
4687ecef14aSJoe Perches 			    inode->i_ino,
469363041a5SMark Fasheh 			    le32_to_cpu(rec->e_cpos),
470e48edee2SMark Fasheh 			    ocfs2_rec_clusters(el, rec));
471363041a5SMark Fasheh 		ret = -EROFS;
472363041a5SMark Fasheh 		goto out;
473363041a5SMark Fasheh 	}
474363041a5SMark Fasheh 
47500dc417fSMark Fasheh 	*ret_rec = *rec;
476363041a5SMark Fasheh 
47700dc417fSMark Fasheh 	/*
47800dc417fSMark Fasheh 	 * Checking for last extent is potentially expensive - we
47900dc417fSMark Fasheh 	 * might have to look at the next leaf over to see if it's
48000dc417fSMark Fasheh 	 * empty.
48100dc417fSMark Fasheh 	 *
48200dc417fSMark Fasheh 	 * The first two checks are to see whether the caller even
48300dc417fSMark Fasheh 	 * cares for this information, and if the extent is at least
48400dc417fSMark Fasheh 	 * the last in it's list.
48500dc417fSMark Fasheh 	 *
48600dc417fSMark Fasheh 	 * If those hold true, then the extent is last if any of the
48700dc417fSMark Fasheh 	 * additional conditions hold true:
48800dc417fSMark Fasheh 	 *  - Extent list is in-inode
48900dc417fSMark Fasheh 	 *  - Extent list is right-most
49000dc417fSMark Fasheh 	 *  - Extent list is 2nd to rightmost, with empty right-most
49100dc417fSMark Fasheh 	 */
49200dc417fSMark Fasheh 	if (is_last) {
49300dc417fSMark Fasheh 		if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
49400dc417fSMark Fasheh 			if (tree_height == 0)
49500dc417fSMark Fasheh 				*is_last = 1;
49600dc417fSMark Fasheh 			else if (eb->h_blkno == di->i_last_eb_blk)
49700dc417fSMark Fasheh 				*is_last = 1;
49800dc417fSMark Fasheh 			else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
49900dc417fSMark Fasheh 				ret = ocfs2_last_eb_is_empty(inode, di);
50000dc417fSMark Fasheh 				if (ret < 0) {
50100dc417fSMark Fasheh 					mlog_errno(ret);
50200dc417fSMark Fasheh 					goto out;
50300dc417fSMark Fasheh 				}
50400dc417fSMark Fasheh 				if (ret == 1)
50500dc417fSMark Fasheh 					*is_last = 1;
50600dc417fSMark Fasheh 			}
50700dc417fSMark Fasheh 		}
50800dc417fSMark Fasheh 	}
50900dc417fSMark Fasheh 
51000dc417fSMark Fasheh out_hole:
51100dc417fSMark Fasheh 	ret = 0;
51200dc417fSMark Fasheh out:
51300dc417fSMark Fasheh 	brelse(eb_bh);
51400dc417fSMark Fasheh 	return ret;
51500dc417fSMark Fasheh }
51600dc417fSMark Fasheh 
51700dc417fSMark Fasheh static void ocfs2_relative_extent_offsets(struct super_block *sb,
51800dc417fSMark Fasheh 					  u32 v_cluster,
51900dc417fSMark Fasheh 					  struct ocfs2_extent_rec *rec,
52000dc417fSMark Fasheh 					  u32 *p_cluster, u32 *num_clusters)
52100dc417fSMark Fasheh 
52200dc417fSMark Fasheh {
52300dc417fSMark Fasheh 	u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
52400dc417fSMark Fasheh 
52500dc417fSMark Fasheh 	*p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
526363041a5SMark Fasheh 	*p_cluster = *p_cluster + coff;
527363041a5SMark Fasheh 
528363041a5SMark Fasheh 	if (num_clusters)
52900dc417fSMark Fasheh 		*num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
53000dc417fSMark Fasheh }
53149cb8d2dSMark Fasheh 
532f56654c4STao Ma int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
533f56654c4STao Ma 			     u32 *p_cluster, u32 *num_clusters,
5341061f9c1STao Ma 			     struct ocfs2_extent_list *el,
5351061f9c1STao Ma 			     unsigned int *extent_flags)
536f56654c4STao Ma {
537f56654c4STao Ma 	int ret = 0, i;
538f56654c4STao Ma 	struct buffer_head *eb_bh = NULL;
539f56654c4STao Ma 	struct ocfs2_extent_block *eb;
540f56654c4STao Ma 	struct ocfs2_extent_rec *rec;
541f56654c4STao Ma 	u32 coff;
542f56654c4STao Ma 
543f56654c4STao Ma 	if (el->l_tree_depth) {
544facdb77fSJoel Becker 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
545facdb77fSJoel Becker 				      &eb_bh);
546f56654c4STao Ma 		if (ret) {
547f56654c4STao Ma 			mlog_errno(ret);
548f56654c4STao Ma 			goto out;
549f56654c4STao Ma 		}
550f56654c4STao Ma 
551f56654c4STao Ma 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
552f56654c4STao Ma 		el = &eb->h_list;
553f56654c4STao Ma 
554f56654c4STao Ma 		if (el->l_tree_depth) {
555f56654c4STao Ma 			ocfs2_error(inode->i_sb,
5567ecef14aSJoe Perches 				    "Inode %lu has non zero tree depth in xattr leaf block %llu\n",
5577ecef14aSJoe Perches 				    inode->i_ino,
558f56654c4STao Ma 				    (unsigned long long)eb_bh->b_blocknr);
559f56654c4STao Ma 			ret = -EROFS;
560f56654c4STao Ma 			goto out;
561f56654c4STao Ma 		}
562f56654c4STao Ma 	}
563f56654c4STao Ma 
564f56654c4STao Ma 	i = ocfs2_search_extent_list(el, v_cluster);
565f56654c4STao Ma 	if (i == -1) {
566f56654c4STao Ma 		ret = -EROFS;
567f56654c4STao Ma 		mlog_errno(ret);
568f56654c4STao Ma 		goto out;
569f56654c4STao Ma 	} else {
570f56654c4STao Ma 		rec = &el->l_recs[i];
571f56654c4STao Ma 		BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
572f56654c4STao Ma 
573f56654c4STao Ma 		if (!rec->e_blkno) {
5747ecef14aSJoe Perches 			ocfs2_error(inode->i_sb,
5757ecef14aSJoe Perches 				    "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
5767ecef14aSJoe Perches 				    inode->i_ino,
577f56654c4STao Ma 				    le32_to_cpu(rec->e_cpos),
578f56654c4STao Ma 				    ocfs2_rec_clusters(el, rec));
579f56654c4STao Ma 			ret = -EROFS;
580f56654c4STao Ma 			goto out;
581f56654c4STao Ma 		}
582f56654c4STao Ma 		coff = v_cluster - le32_to_cpu(rec->e_cpos);
583f56654c4STao Ma 		*p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
584f56654c4STao Ma 						    le64_to_cpu(rec->e_blkno));
585f56654c4STao Ma 		*p_cluster = *p_cluster + coff;
586f56654c4STao Ma 		if (num_clusters)
587f56654c4STao Ma 			*num_clusters = ocfs2_rec_clusters(el, rec) - coff;
5881061f9c1STao Ma 
5891061f9c1STao Ma 		if (extent_flags)
5901061f9c1STao Ma 			*extent_flags = rec->e_flags;
591f56654c4STao Ma 	}
592f56654c4STao Ma out:
593f56654c4STao Ma 	brelse(eb_bh);
594f56654c4STao Ma 	return ret;
595f56654c4STao Ma }
596f56654c4STao Ma 
59700dc417fSMark Fasheh int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
59800dc417fSMark Fasheh 		       u32 *p_cluster, u32 *num_clusters,
59900dc417fSMark Fasheh 		       unsigned int *extent_flags)
60000dc417fSMark Fasheh {
60100dc417fSMark Fasheh 	int ret;
60200dc417fSMark Fasheh 	unsigned int uninitialized_var(hole_len), flags = 0;
60300dc417fSMark Fasheh 	struct buffer_head *di_bh = NULL;
60400dc417fSMark Fasheh 	struct ocfs2_extent_rec rec;
60583418978SMark Fasheh 
60600dc417fSMark Fasheh 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
60700dc417fSMark Fasheh 		ret = -ERANGE;
60800dc417fSMark Fasheh 		mlog_errno(ret);
60900dc417fSMark Fasheh 		goto out;
61000dc417fSMark Fasheh 	}
61100dc417fSMark Fasheh 
61200dc417fSMark Fasheh 	ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
61300dc417fSMark Fasheh 				      num_clusters, extent_flags);
61400dc417fSMark Fasheh 	if (ret == 0)
61500dc417fSMark Fasheh 		goto out;
61600dc417fSMark Fasheh 
617b657c95cSJoel Becker 	ret = ocfs2_read_inode_block(inode, &di_bh);
61800dc417fSMark Fasheh 	if (ret) {
61900dc417fSMark Fasheh 		mlog_errno(ret);
62000dc417fSMark Fasheh 		goto out;
62100dc417fSMark Fasheh 	}
62200dc417fSMark Fasheh 
62300dc417fSMark Fasheh 	ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
62400dc417fSMark Fasheh 					 &rec, NULL);
62500dc417fSMark Fasheh 	if (ret) {
62600dc417fSMark Fasheh 		mlog_errno(ret);
62700dc417fSMark Fasheh 		goto out;
62800dc417fSMark Fasheh 	}
62900dc417fSMark Fasheh 
63000dc417fSMark Fasheh 	if (rec.e_blkno == 0ULL) {
63100dc417fSMark Fasheh 		/*
63200dc417fSMark Fasheh 		 * A hole was found. Return some canned values that
63300dc417fSMark Fasheh 		 * callers can key on. If asked for, num_clusters will
63400dc417fSMark Fasheh 		 * be populated with the size of the hole.
63500dc417fSMark Fasheh 		 */
63600dc417fSMark Fasheh 		*p_cluster = 0;
63700dc417fSMark Fasheh 		if (num_clusters) {
63800dc417fSMark Fasheh 			*num_clusters = hole_len;
63900dc417fSMark Fasheh 		}
64000dc417fSMark Fasheh 	} else {
64100dc417fSMark Fasheh 		ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
64200dc417fSMark Fasheh 					      p_cluster, num_clusters);
64300dc417fSMark Fasheh 		flags = rec.e_flags;
64400dc417fSMark Fasheh 
64500dc417fSMark Fasheh 		ocfs2_extent_map_insert_rec(inode, &rec);
646363041a5SMark Fasheh 	}
647363041a5SMark Fasheh 
64849cb8d2dSMark Fasheh 	if (extent_flags)
64949cb8d2dSMark Fasheh 		*extent_flags = flags;
65049cb8d2dSMark Fasheh 
651363041a5SMark Fasheh out:
652363041a5SMark Fasheh 	brelse(di_bh);
653363041a5SMark Fasheh 	return ret;
654363041a5SMark Fasheh }
655363041a5SMark Fasheh 
656363041a5SMark Fasheh /*
657363041a5SMark Fasheh  * This expects alloc_sem to be held. The allocation cannot change at
658363041a5SMark Fasheh  * all while the map is in the process of being updated.
659363041a5SMark Fasheh  */
660363041a5SMark Fasheh int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
6614f902c37SMark Fasheh 				u64 *ret_count, unsigned int *extent_flags)
662363041a5SMark Fasheh {
663363041a5SMark Fasheh 	int ret;
664363041a5SMark Fasheh 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
665363041a5SMark Fasheh 	u32 cpos, num_clusters, p_cluster;
666363041a5SMark Fasheh 	u64 boff = 0;
667ccd979bdSMark Fasheh 
668ccd979bdSMark Fasheh 	cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
669ccd979bdSMark Fasheh 
67049cb8d2dSMark Fasheh 	ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
67149cb8d2dSMark Fasheh 				 extent_flags);
672ccd979bdSMark Fasheh 	if (ret) {
673ccd979bdSMark Fasheh 		mlog_errno(ret);
674363041a5SMark Fasheh 		goto out;
675ccd979bdSMark Fasheh 	}
676ccd979bdSMark Fasheh 
677363041a5SMark Fasheh 	/*
678363041a5SMark Fasheh 	 * p_cluster == 0 indicates a hole.
679363041a5SMark Fasheh 	 */
680363041a5SMark Fasheh 	if (p_cluster) {
681363041a5SMark Fasheh 		boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
682ccd979bdSMark Fasheh 		boff += (v_blkno & (u64)(bpc - 1));
683363041a5SMark Fasheh 	}
684363041a5SMark Fasheh 
685363041a5SMark Fasheh 	*p_blkno = boff;
686ccd979bdSMark Fasheh 
687ccd979bdSMark Fasheh 	if (ret_count) {
688363041a5SMark Fasheh 		*ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
689363041a5SMark Fasheh 		*ret_count -= v_blkno & (u64)(bpc - 1);
690ccd979bdSMark Fasheh 	}
691ccd979bdSMark Fasheh 
692363041a5SMark Fasheh out:
693363041a5SMark Fasheh 	return ret;
694ccd979bdSMark Fasheh }
69500dc417fSMark Fasheh 
69686239d59STristan Ye /*
69786239d59STristan Ye  * The ocfs2_fiemap_inline() may be a little bit misleading, since
69886239d59STristan Ye  * it not only handles the fiemap for inlined files, but also deals
69986239d59STristan Ye  * with the fast symlink, cause they have no difference for extent
70086239d59STristan Ye  * mapping per se.
70186239d59STristan Ye  */
70200dc417fSMark Fasheh static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
70300dc417fSMark Fasheh 			       struct fiemap_extent_info *fieinfo,
70400dc417fSMark Fasheh 			       u64 map_start)
70500dc417fSMark Fasheh {
70600dc417fSMark Fasheh 	int ret;
70700dc417fSMark Fasheh 	unsigned int id_count;
70800dc417fSMark Fasheh 	struct ocfs2_dinode *di;
70900dc417fSMark Fasheh 	u64 phys;
71000dc417fSMark Fasheh 	u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
71100dc417fSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
71200dc417fSMark Fasheh 
71300dc417fSMark Fasheh 	di = (struct ocfs2_dinode *)di_bh->b_data;
71486239d59STristan Ye 	if (ocfs2_inode_is_fast_symlink(inode))
71586239d59STristan Ye 		id_count = ocfs2_fast_symlink_chars(inode->i_sb);
71686239d59STristan Ye 	else
71700dc417fSMark Fasheh 		id_count = le16_to_cpu(di->id2.i_data.id_count);
71800dc417fSMark Fasheh 
71900dc417fSMark Fasheh 	if (map_start < id_count) {
72000dc417fSMark Fasheh 		phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
72186239d59STristan Ye 		if (ocfs2_inode_is_fast_symlink(inode))
72286239d59STristan Ye 			phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
72386239d59STristan Ye 		else
72486239d59STristan Ye 			phys += offsetof(struct ocfs2_dinode,
72586239d59STristan Ye 					 id2.i_data.id_data);
72600dc417fSMark Fasheh 
72700dc417fSMark Fasheh 		ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
72800dc417fSMark Fasheh 					      flags);
72900dc417fSMark Fasheh 		if (ret < 0)
73000dc417fSMark Fasheh 			return ret;
73100dc417fSMark Fasheh 	}
73200dc417fSMark Fasheh 
73300dc417fSMark Fasheh 	return 0;
73400dc417fSMark Fasheh }
73500dc417fSMark Fasheh 
73600dc417fSMark Fasheh int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
73700dc417fSMark Fasheh 		 u64 map_start, u64 map_len)
73800dc417fSMark Fasheh {
73900dc417fSMark Fasheh 	int ret, is_last;
74000dc417fSMark Fasheh 	u32 mapping_end, cpos;
74100dc417fSMark Fasheh 	unsigned int hole_size;
74200dc417fSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
74300dc417fSMark Fasheh 	u64 len_bytes, phys_bytes, virt_bytes;
74400dc417fSMark Fasheh 	struct buffer_head *di_bh = NULL;
74500dc417fSMark Fasheh 	struct ocfs2_extent_rec rec;
74600dc417fSMark Fasheh 
747*45dd052eSChristoph Hellwig 	ret = fiemap_prep(inode, fieinfo, map_start, &map_len, 0);
74800dc417fSMark Fasheh 	if (ret)
74900dc417fSMark Fasheh 		return ret;
75000dc417fSMark Fasheh 
75100dc417fSMark Fasheh 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
75200dc417fSMark Fasheh 	if (ret) {
75300dc417fSMark Fasheh 		mlog_errno(ret);
75400dc417fSMark Fasheh 		goto out;
75500dc417fSMark Fasheh 	}
75600dc417fSMark Fasheh 
75700dc417fSMark Fasheh 	down_read(&OCFS2_I(inode)->ip_alloc_sem);
75800dc417fSMark Fasheh 
75900dc417fSMark Fasheh 	/*
76086239d59STristan Ye 	 * Handle inline-data and fast symlink separately.
76100dc417fSMark Fasheh 	 */
76286239d59STristan Ye 	if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
76386239d59STristan Ye 	    ocfs2_inode_is_fast_symlink(inode)) {
76400dc417fSMark Fasheh 		ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
76500dc417fSMark Fasheh 		goto out_unlock;
76600dc417fSMark Fasheh 	}
76700dc417fSMark Fasheh 
76800dc417fSMark Fasheh 	cpos = map_start >> osb->s_clustersize_bits;
76900dc417fSMark Fasheh 	mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
77000dc417fSMark Fasheh 					       map_start + map_len);
77100dc417fSMark Fasheh 	is_last = 0;
77200dc417fSMark Fasheh 	while (cpos < mapping_end && !is_last) {
77300dc417fSMark Fasheh 		u32 fe_flags;
77400dc417fSMark Fasheh 
77500dc417fSMark Fasheh 		ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
77600dc417fSMark Fasheh 						 &hole_size, &rec, &is_last);
77700dc417fSMark Fasheh 		if (ret) {
77800dc417fSMark Fasheh 			mlog_errno(ret);
779b4ca2b4bSJoseph Qi 			goto out_unlock;
78000dc417fSMark Fasheh 		}
78100dc417fSMark Fasheh 
78200dc417fSMark Fasheh 		if (rec.e_blkno == 0ULL) {
78300dc417fSMark Fasheh 			cpos += hole_size;
78400dc417fSMark Fasheh 			continue;
78500dc417fSMark Fasheh 		}
78600dc417fSMark Fasheh 
78700dc417fSMark Fasheh 		fe_flags = 0;
78800dc417fSMark Fasheh 		if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
78900dc417fSMark Fasheh 			fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
790faf8b70fSSunil Mushran 		if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
791faf8b70fSSunil Mushran 			fe_flags |= FIEMAP_EXTENT_SHARED;
79200dc417fSMark Fasheh 		if (is_last)
79300dc417fSMark Fasheh 			fe_flags |= FIEMAP_EXTENT_LAST;
79400dc417fSMark Fasheh 		len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
79500dc417fSMark Fasheh 		phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
79600dc417fSMark Fasheh 		virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
79700dc417fSMark Fasheh 
79800dc417fSMark Fasheh 		ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
79900dc417fSMark Fasheh 					      len_bytes, fe_flags);
80000dc417fSMark Fasheh 		if (ret)
80100dc417fSMark Fasheh 			break;
80200dc417fSMark Fasheh 
80300dc417fSMark Fasheh 		cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
80400dc417fSMark Fasheh 	}
80500dc417fSMark Fasheh 
80600dc417fSMark Fasheh 	if (ret > 0)
80700dc417fSMark Fasheh 		ret = 0;
80800dc417fSMark Fasheh 
80900dc417fSMark Fasheh out_unlock:
81000dc417fSMark Fasheh 	brelse(di_bh);
81100dc417fSMark Fasheh 
81200dc417fSMark Fasheh 	up_read(&OCFS2_I(inode)->ip_alloc_sem);
81300dc417fSMark Fasheh 
81400dc417fSMark Fasheh 	ocfs2_inode_unlock(inode, 0);
81500dc417fSMark Fasheh out:
81600dc417fSMark Fasheh 
81700dc417fSMark Fasheh 	return ret;
81800dc417fSMark Fasheh }
819a8549fb5SJoel Becker 
820ac604d3cSGang He /* Is IO overwriting allocated blocks? */
821ac604d3cSGang He int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
822ac604d3cSGang He 		       u64 map_start, u64 map_len)
823ac604d3cSGang He {
824ac604d3cSGang He 	int ret = 0, is_last;
825ac604d3cSGang He 	u32 mapping_end, cpos;
826ac604d3cSGang He 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
827ac604d3cSGang He 	struct ocfs2_extent_rec rec;
828ac604d3cSGang He 
829ac604d3cSGang He 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
830ac604d3cSGang He 		if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
831ac604d3cSGang He 			return ret;
832ac604d3cSGang He 		else
833ac604d3cSGang He 			return -EAGAIN;
834ac604d3cSGang He 	}
835ac604d3cSGang He 
836ac604d3cSGang He 	cpos = map_start >> osb->s_clustersize_bits;
837ac604d3cSGang He 	mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
838ac604d3cSGang He 					       map_start + map_len);
839ac604d3cSGang He 	is_last = 0;
840ac604d3cSGang He 	while (cpos < mapping_end && !is_last) {
841ac604d3cSGang He 		ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
842ac604d3cSGang He 						 NULL, &rec, &is_last);
843ac604d3cSGang He 		if (ret) {
844ac604d3cSGang He 			mlog_errno(ret);
845ac604d3cSGang He 			goto out;
846ac604d3cSGang He 		}
847ac604d3cSGang He 
848ac604d3cSGang He 		if (rec.e_blkno == 0ULL)
849ac604d3cSGang He 			break;
850ac604d3cSGang He 
851ac604d3cSGang He 		if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
852ac604d3cSGang He 			break;
853ac604d3cSGang He 
854ac604d3cSGang He 		cpos = le32_to_cpu(rec.e_cpos) +
855ac604d3cSGang He 			le16_to_cpu(rec.e_leaf_clusters);
856ac604d3cSGang He 	}
857ac604d3cSGang He 
858ac604d3cSGang He 	if (cpos < mapping_end)
859ac604d3cSGang He 		ret = -EAGAIN;
860ac604d3cSGang He out:
861ac604d3cSGang He 	return ret;
862ac604d3cSGang He }
863ac604d3cSGang He 
864965c8e59SAndrew Morton int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
86593862d5eSSunil Mushran {
86693862d5eSSunil Mushran 	struct inode *inode = file->f_mapping->host;
86793862d5eSSunil Mushran 	int ret;
86893862d5eSSunil Mushran 	unsigned int is_last = 0, is_data = 0;
86993862d5eSSunil Mushran 	u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
87093862d5eSSunil Mushran 	u32 cpos, cend, clen, hole_size;
87193862d5eSSunil Mushran 	u64 extoff, extlen;
87293862d5eSSunil Mushran 	struct buffer_head *di_bh = NULL;
87393862d5eSSunil Mushran 	struct ocfs2_extent_rec rec;
87493862d5eSSunil Mushran 
875965c8e59SAndrew Morton 	BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
87693862d5eSSunil Mushran 
87793862d5eSSunil Mushran 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
87893862d5eSSunil Mushran 	if (ret) {
87993862d5eSSunil Mushran 		mlog_errno(ret);
88093862d5eSSunil Mushran 		goto out;
88193862d5eSSunil Mushran 	}
88293862d5eSSunil Mushran 
88393862d5eSSunil Mushran 	down_read(&OCFS2_I(inode)->ip_alloc_sem);
88493862d5eSSunil Mushran 
885f17c20ddSJunxiao Bi 	if (*offset >= i_size_read(inode)) {
88693862d5eSSunil Mushran 		ret = -ENXIO;
88793862d5eSSunil Mushran 		goto out_unlock;
88893862d5eSSunil Mushran 	}
88993862d5eSSunil Mushran 
89093862d5eSSunil Mushran 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
891965c8e59SAndrew Morton 		if (whence == SEEK_HOLE)
892f17c20ddSJunxiao Bi 			*offset = i_size_read(inode);
89393862d5eSSunil Mushran 		goto out_unlock;
89493862d5eSSunil Mushran 	}
89593862d5eSSunil Mushran 
89693862d5eSSunil Mushran 	clen = 0;
89793862d5eSSunil Mushran 	cpos = *offset >> cs_bits;
898f17c20ddSJunxiao Bi 	cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
89993862d5eSSunil Mushran 
90093862d5eSSunil Mushran 	while (cpos < cend && !is_last) {
90193862d5eSSunil Mushran 		ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
90293862d5eSSunil Mushran 						 &rec, &is_last);
90393862d5eSSunil Mushran 		if (ret) {
90493862d5eSSunil Mushran 			mlog_errno(ret);
90593862d5eSSunil Mushran 			goto out_unlock;
90693862d5eSSunil Mushran 		}
90793862d5eSSunil Mushran 
90893862d5eSSunil Mushran 		extoff = cpos;
90993862d5eSSunil Mushran 		extoff <<= cs_bits;
91093862d5eSSunil Mushran 
91193862d5eSSunil Mushran 		if (rec.e_blkno == 0ULL) {
91293862d5eSSunil Mushran 			clen = hole_size;
91393862d5eSSunil Mushran 			is_data = 0;
91493862d5eSSunil Mushran 		} else {
91593862d5eSSunil Mushran 			clen = le16_to_cpu(rec.e_leaf_clusters) -
91693862d5eSSunil Mushran 				(cpos - le32_to_cpu(rec.e_cpos));
91793862d5eSSunil Mushran 			is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
91893862d5eSSunil Mushran 		}
91993862d5eSSunil Mushran 
920965c8e59SAndrew Morton 		if ((!is_data && whence == SEEK_HOLE) ||
921965c8e59SAndrew Morton 		    (is_data && whence == SEEK_DATA)) {
92293862d5eSSunil Mushran 			if (extoff > *offset)
92393862d5eSSunil Mushran 				*offset = extoff;
92493862d5eSSunil Mushran 			goto out_unlock;
92593862d5eSSunil Mushran 		}
92693862d5eSSunil Mushran 
92793862d5eSSunil Mushran 		if (!is_last)
92893862d5eSSunil Mushran 			cpos += clen;
92993862d5eSSunil Mushran 	}
93093862d5eSSunil Mushran 
931965c8e59SAndrew Morton 	if (whence == SEEK_HOLE) {
93293862d5eSSunil Mushran 		extoff = cpos;
93393862d5eSSunil Mushran 		extoff <<= cs_bits;
93493862d5eSSunil Mushran 		extlen = clen;
93593862d5eSSunil Mushran 		extlen <<=  cs_bits;
93693862d5eSSunil Mushran 
937f17c20ddSJunxiao Bi 		if ((extoff + extlen) > i_size_read(inode))
938f17c20ddSJunxiao Bi 			extlen = i_size_read(inode) - extoff;
93993862d5eSSunil Mushran 		extoff += extlen;
94093862d5eSSunil Mushran 		if (extoff > *offset)
94193862d5eSSunil Mushran 			*offset = extoff;
94293862d5eSSunil Mushran 		goto out_unlock;
94393862d5eSSunil Mushran 	}
94493862d5eSSunil Mushran 
94593862d5eSSunil Mushran 	ret = -ENXIO;
94693862d5eSSunil Mushran 
94793862d5eSSunil Mushran out_unlock:
94893862d5eSSunil Mushran 
94993862d5eSSunil Mushran 	brelse(di_bh);
95093862d5eSSunil Mushran 
95193862d5eSSunil Mushran 	up_read(&OCFS2_I(inode)->ip_alloc_sem);
95293862d5eSSunil Mushran 
95393862d5eSSunil Mushran 	ocfs2_inode_unlock(inode, 0);
95493862d5eSSunil Mushran out:
95593862d5eSSunil Mushran 	return ret;
95693862d5eSSunil Mushran }
95793862d5eSSunil Mushran 
958a8549fb5SJoel Becker int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
959a8549fb5SJoel Becker 			   struct buffer_head *bhs[], int flags,
960a8549fb5SJoel Becker 			   int (*validate)(struct super_block *sb,
961a8549fb5SJoel Becker 					   struct buffer_head *bh))
962a8549fb5SJoel Becker {
963a8549fb5SJoel Becker 	int rc = 0;
964a8549fb5SJoel Becker 	u64 p_block, p_count;
965a8549fb5SJoel Becker 	int i, count, done = 0;
966a8549fb5SJoel Becker 
967a716357cSTao Ma 	trace_ocfs2_read_virt_blocks(
968a8549fb5SJoel Becker 	     inode, (unsigned long long)v_block, nr, bhs, flags,
969a8549fb5SJoel Becker 	     validate);
970a8549fb5SJoel Becker 
971a8549fb5SJoel Becker 	if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
972a8549fb5SJoel Becker 	    i_size_read(inode)) {
973a8549fb5SJoel Becker 		BUG_ON(!(flags & OCFS2_BH_READAHEAD));
974a8549fb5SJoel Becker 		goto out;
975a8549fb5SJoel Becker 	}
976a8549fb5SJoel Becker 
977a8549fb5SJoel Becker 	while (done < nr) {
978a8549fb5SJoel Becker 		down_read(&OCFS2_I(inode)->ip_alloc_sem);
979a8549fb5SJoel Becker 		rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
980a8549fb5SJoel Becker 						 &p_block, &p_count, NULL);
981a8549fb5SJoel Becker 		up_read(&OCFS2_I(inode)->ip_alloc_sem);
982a8549fb5SJoel Becker 		if (rc) {
983a8549fb5SJoel Becker 			mlog_errno(rc);
984a8549fb5SJoel Becker 			break;
985a8549fb5SJoel Becker 		}
986a8549fb5SJoel Becker 
987a8549fb5SJoel Becker 		if (!p_block) {
988a8549fb5SJoel Becker 			rc = -EIO;
989a8549fb5SJoel Becker 			mlog(ML_ERROR,
990a8549fb5SJoel Becker 			     "Inode #%llu contains a hole at offset %llu\n",
991a8549fb5SJoel Becker 			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
992a8549fb5SJoel Becker 			     (unsigned long long)(v_block + done) <<
993a8549fb5SJoel Becker 			     inode->i_sb->s_blocksize_bits);
994a8549fb5SJoel Becker 			break;
995a8549fb5SJoel Becker 		}
996a8549fb5SJoel Becker 
997a8549fb5SJoel Becker 		count = nr - done;
998a8549fb5SJoel Becker 		if (p_count < count)
999a8549fb5SJoel Becker 			count = p_count;
1000a8549fb5SJoel Becker 
1001a8549fb5SJoel Becker 		/*
1002a8549fb5SJoel Becker 		 * If the caller passed us bhs, they should have come
1003a8549fb5SJoel Becker 		 * from a previous readahead call to this function.  Thus,
1004a8549fb5SJoel Becker 		 * they should have the right b_blocknr.
1005a8549fb5SJoel Becker 		 */
1006a8549fb5SJoel Becker 		for (i = 0; i < count; i++) {
1007a8549fb5SJoel Becker 			if (!bhs[done + i])
1008a8549fb5SJoel Becker 				continue;
1009a8549fb5SJoel Becker 			BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
1010a8549fb5SJoel Becker 		}
1011a8549fb5SJoel Becker 
10128cb471e8SJoel Becker 		rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
10138cb471e8SJoel Becker 				       bhs + done, flags, validate);
1014a8549fb5SJoel Becker 		if (rc) {
1015a8549fb5SJoel Becker 			mlog_errno(rc);
1016a8549fb5SJoel Becker 			break;
1017a8549fb5SJoel Becker 		}
1018a8549fb5SJoel Becker 		done += count;
1019a8549fb5SJoel Becker 	}
1020a8549fb5SJoel Becker 
1021a8549fb5SJoel Becker out:
1022a8549fb5SJoel Becker 	return rc;
1023a8549fb5SJoel Becker }
1024a8549fb5SJoel Becker 
1025a8549fb5SJoel Becker 
1026