xref: /linux/fs/xfs/libxfs/xfs_rmap.c (revision b477ff98d903618a1ab8247861f2ea6e70c0f0f8)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2014 Red Hat, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_mount.h"
14 #include "xfs_sb.h"
15 #include "xfs_defer.h"
16 #include "xfs_btree.h"
17 #include "xfs_trans.h"
18 #include "xfs_alloc.h"
19 #include "xfs_rmap.h"
20 #include "xfs_rmap_btree.h"
21 #include "xfs_trace.h"
22 #include "xfs_errortag.h"
23 #include "xfs_error.h"
24 #include "xfs_inode.h"
25 #include "xfs_ag.h"
26 #include "xfs_health.h"
27 #include "xfs_rmap_item.h"
28 #include "xfs_rtgroup.h"
29 #include "xfs_rtrmap_btree.h"
30 
31 struct kmem_cache	*xfs_rmap_intent_cache;
32 
33 /*
34  * Lookup the first record less than or equal to [bno, len, owner, offset]
35  * in the btree given by cur.
36  */
37 int
xfs_rmap_lookup_le(struct xfs_btree_cur * cur,xfs_agblock_t bno,uint64_t owner,uint64_t offset,unsigned int flags,struct xfs_rmap_irec * irec,int * stat)38 xfs_rmap_lookup_le(
39 	struct xfs_btree_cur	*cur,
40 	xfs_agblock_t		bno,
41 	uint64_t		owner,
42 	uint64_t		offset,
43 	unsigned int		flags,
44 	struct xfs_rmap_irec	*irec,
45 	int			*stat)
46 {
47 	int			get_stat = 0;
48 	int			error;
49 
50 	cur->bc_rec.r.rm_startblock = bno;
51 	cur->bc_rec.r.rm_blockcount = 0;
52 	cur->bc_rec.r.rm_owner = owner;
53 	cur->bc_rec.r.rm_offset = offset;
54 	cur->bc_rec.r.rm_flags = flags;
55 
56 	error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
57 	if (error || !(*stat) || !irec)
58 		return error;
59 
60 	error = xfs_rmap_get_rec(cur, irec, &get_stat);
61 	if (error)
62 		return error;
63 	if (!get_stat) {
64 		xfs_btree_mark_sick(cur);
65 		return -EFSCORRUPTED;
66 	}
67 
68 	return 0;
69 }
70 
71 /*
72  * Lookup the record exactly matching [bno, len, owner, offset]
73  * in the btree given by cur.
74  */
75 int
xfs_rmap_lookup_eq(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,uint64_t owner,uint64_t offset,unsigned int flags,int * stat)76 xfs_rmap_lookup_eq(
77 	struct xfs_btree_cur	*cur,
78 	xfs_agblock_t		bno,
79 	xfs_extlen_t		len,
80 	uint64_t		owner,
81 	uint64_t		offset,
82 	unsigned int		flags,
83 	int			*stat)
84 {
85 	cur->bc_rec.r.rm_startblock = bno;
86 	cur->bc_rec.r.rm_blockcount = len;
87 	cur->bc_rec.r.rm_owner = owner;
88 	cur->bc_rec.r.rm_offset = offset;
89 	cur->bc_rec.r.rm_flags = flags;
90 	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
91 }
92 
93 /*
94  * Update the record referred to by cur to the value given
95  * by [bno, len, owner, offset].
96  * This either works (return 0) or gets an EFSCORRUPTED error.
97  */
98 STATIC int
xfs_rmap_update(struct xfs_btree_cur * cur,struct xfs_rmap_irec * irec)99 xfs_rmap_update(
100 	struct xfs_btree_cur	*cur,
101 	struct xfs_rmap_irec	*irec)
102 {
103 	union xfs_btree_rec	rec;
104 	int			error;
105 
106 	trace_xfs_rmap_update(cur, irec->rm_startblock, irec->rm_blockcount,
107 			irec->rm_owner, irec->rm_offset, irec->rm_flags);
108 
109 	rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
110 	rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount);
111 	rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner);
112 	rec.rmap.rm_offset = cpu_to_be64(
113 			xfs_rmap_irec_offset_pack(irec));
114 	error = xfs_btree_update(cur, &rec);
115 	if (error)
116 		trace_xfs_rmap_update_error(cur, error, _RET_IP_);
117 	return error;
118 }
119 
120 int
xfs_rmap_insert(struct xfs_btree_cur * rcur,xfs_agblock_t agbno,xfs_extlen_t len,uint64_t owner,uint64_t offset,unsigned int flags)121 xfs_rmap_insert(
122 	struct xfs_btree_cur	*rcur,
123 	xfs_agblock_t		agbno,
124 	xfs_extlen_t		len,
125 	uint64_t		owner,
126 	uint64_t		offset,
127 	unsigned int		flags)
128 {
129 	int			i;
130 	int			error;
131 
132 	trace_xfs_rmap_insert(rcur, agbno, len, owner, offset, flags);
133 
134 	error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
135 	if (error)
136 		goto done;
137 	if (XFS_IS_CORRUPT(rcur->bc_mp, i != 0)) {
138 		xfs_btree_mark_sick(rcur);
139 		error = -EFSCORRUPTED;
140 		goto done;
141 	}
142 
143 	rcur->bc_rec.r.rm_startblock = agbno;
144 	rcur->bc_rec.r.rm_blockcount = len;
145 	rcur->bc_rec.r.rm_owner = owner;
146 	rcur->bc_rec.r.rm_offset = offset;
147 	rcur->bc_rec.r.rm_flags = flags;
148 	error = xfs_btree_insert(rcur, &i);
149 	if (error)
150 		goto done;
151 	if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
152 		xfs_btree_mark_sick(rcur);
153 		error = -EFSCORRUPTED;
154 		goto done;
155 	}
156 done:
157 	if (error)
158 		trace_xfs_rmap_insert_error(rcur, error, _RET_IP_);
159 	return error;
160 }
161 
162 STATIC int
xfs_rmap_delete(struct xfs_btree_cur * rcur,xfs_agblock_t agbno,xfs_extlen_t len,uint64_t owner,uint64_t offset,unsigned int flags)163 xfs_rmap_delete(
164 	struct xfs_btree_cur	*rcur,
165 	xfs_agblock_t		agbno,
166 	xfs_extlen_t		len,
167 	uint64_t		owner,
168 	uint64_t		offset,
169 	unsigned int		flags)
170 {
171 	int			i;
172 	int			error;
173 
174 	trace_xfs_rmap_delete(rcur, agbno, len, owner, offset, flags);
175 
176 	error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
177 	if (error)
178 		goto done;
179 	if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
180 		xfs_btree_mark_sick(rcur);
181 		error = -EFSCORRUPTED;
182 		goto done;
183 	}
184 
185 	error = xfs_btree_delete(rcur, &i);
186 	if (error)
187 		goto done;
188 	if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
189 		xfs_btree_mark_sick(rcur);
190 		error = -EFSCORRUPTED;
191 		goto done;
192 	}
193 done:
194 	if (error)
195 		trace_xfs_rmap_delete_error(rcur, error, _RET_IP_);
196 	return error;
197 }
198 
199 /* Convert an internal btree record to an rmap record. */
200 xfs_failaddr_t
xfs_rmap_btrec_to_irec(const union xfs_btree_rec * rec,struct xfs_rmap_irec * irec)201 xfs_rmap_btrec_to_irec(
202 	const union xfs_btree_rec	*rec,
203 	struct xfs_rmap_irec		*irec)
204 {
205 	irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock);
206 	irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount);
207 	irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner);
208 	return xfs_rmap_irec_offset_unpack(be64_to_cpu(rec->rmap.rm_offset),
209 			irec);
210 }
211 
212 /* Simple checks for rmap records. */
213 xfs_failaddr_t
xfs_rmap_check_irec(struct xfs_perag * pag,const struct xfs_rmap_irec * irec)214 xfs_rmap_check_irec(
215 	struct xfs_perag		*pag,
216 	const struct xfs_rmap_irec	*irec)
217 {
218 	struct xfs_mount		*mp = pag_mount(pag);
219 	bool				is_inode;
220 	bool				is_unwritten;
221 	bool				is_bmbt;
222 	bool				is_attr;
223 
224 	if (irec->rm_blockcount == 0)
225 		return __this_address;
226 	if (irec->rm_startblock <= XFS_AGFL_BLOCK(mp)) {
227 		if (irec->rm_owner != XFS_RMAP_OWN_FS)
228 			return __this_address;
229 		if (irec->rm_blockcount != XFS_AGFL_BLOCK(mp) + 1)
230 			return __this_address;
231 	} else {
232 		/* check for valid extent range, including overflow */
233 		if (!xfs_verify_agbext(pag, irec->rm_startblock,
234 					    irec->rm_blockcount))
235 			return __this_address;
236 	}
237 
238 	if (!(xfs_verify_ino(mp, irec->rm_owner) ||
239 	      (irec->rm_owner <= XFS_RMAP_OWN_FS &&
240 	       irec->rm_owner >= XFS_RMAP_OWN_MIN)))
241 		return __this_address;
242 
243 	/* Check flags. */
244 	is_inode = !XFS_RMAP_NON_INODE_OWNER(irec->rm_owner);
245 	is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK;
246 	is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK;
247 	is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN;
248 
249 	if (is_bmbt && irec->rm_offset != 0)
250 		return __this_address;
251 
252 	if (!is_inode && irec->rm_offset != 0)
253 		return __this_address;
254 
255 	if (is_unwritten && (is_bmbt || !is_inode || is_attr))
256 		return __this_address;
257 
258 	if (!is_inode && (is_bmbt || is_unwritten || is_attr))
259 		return __this_address;
260 
261 	/* Check for a valid fork offset, if applicable. */
262 	if (is_inode && !is_bmbt &&
263 	    !xfs_verify_fileext(mp, irec->rm_offset, irec->rm_blockcount))
264 		return __this_address;
265 
266 	return NULL;
267 }
268 
269 static xfs_failaddr_t
xfs_rtrmap_check_meta_irec(struct xfs_rtgroup * rtg,const struct xfs_rmap_irec * irec)270 xfs_rtrmap_check_meta_irec(
271 	struct xfs_rtgroup		*rtg,
272 	const struct xfs_rmap_irec	*irec)
273 {
274 	struct xfs_mount		*mp = rtg_mount(rtg);
275 
276 	if (irec->rm_offset != 0)
277 		return __this_address;
278 	if (irec->rm_flags & XFS_RMAP_UNWRITTEN)
279 		return __this_address;
280 
281 	switch (irec->rm_owner) {
282 	case XFS_RMAP_OWN_FS:
283 		if (irec->rm_startblock != 0)
284 			return __this_address;
285 		if (irec->rm_blockcount != mp->m_sb.sb_rextsize)
286 			return __this_address;
287 		return NULL;
288 	case XFS_RMAP_OWN_COW:
289 		if (!xfs_has_rtreflink(mp))
290 			return __this_address;
291 		if (!xfs_verify_rgbext(rtg, irec->rm_startblock,
292 					    irec->rm_blockcount))
293 			return __this_address;
294 		return NULL;
295 	default:
296 		return __this_address;
297 	}
298 
299 	return NULL;
300 }
301 
302 static xfs_failaddr_t
xfs_rtrmap_check_inode_irec(struct xfs_rtgroup * rtg,const struct xfs_rmap_irec * irec)303 xfs_rtrmap_check_inode_irec(
304 	struct xfs_rtgroup		*rtg,
305 	const struct xfs_rmap_irec	*irec)
306 {
307 	struct xfs_mount		*mp = rtg_mount(rtg);
308 
309 	if (!xfs_verify_ino(mp, irec->rm_owner))
310 		return __this_address;
311 	if (!xfs_verify_rgbext(rtg, irec->rm_startblock, irec->rm_blockcount))
312 		return __this_address;
313 	if (!xfs_verify_fileext(mp, irec->rm_offset, irec->rm_blockcount))
314 		return __this_address;
315 	return NULL;
316 }
317 
318 xfs_failaddr_t
xfs_rtrmap_check_irec(struct xfs_rtgroup * rtg,const struct xfs_rmap_irec * irec)319 xfs_rtrmap_check_irec(
320 	struct xfs_rtgroup		*rtg,
321 	const struct xfs_rmap_irec	*irec)
322 {
323 	if (irec->rm_blockcount == 0)
324 		return __this_address;
325 	if (irec->rm_flags & (XFS_RMAP_BMBT_BLOCK | XFS_RMAP_ATTR_FORK))
326 		return __this_address;
327 	if (XFS_RMAP_NON_INODE_OWNER(irec->rm_owner))
328 		return xfs_rtrmap_check_meta_irec(rtg, irec);
329 	return xfs_rtrmap_check_inode_irec(rtg, irec);
330 }
331 
332 static inline xfs_failaddr_t
xfs_rmap_check_btrec(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * irec)333 xfs_rmap_check_btrec(
334 	struct xfs_btree_cur		*cur,
335 	const struct xfs_rmap_irec	*irec)
336 {
337 	if (xfs_btree_is_rtrmap(cur->bc_ops) ||
338 	    xfs_btree_is_mem_rtrmap(cur->bc_ops))
339 		return xfs_rtrmap_check_irec(to_rtg(cur->bc_group), irec);
340 	return xfs_rmap_check_irec(to_perag(cur->bc_group), irec);
341 }
342 
343 static inline int
xfs_rmap_complain_bad_rec(struct xfs_btree_cur * cur,xfs_failaddr_t fa,const struct xfs_rmap_irec * irec)344 xfs_rmap_complain_bad_rec(
345 	struct xfs_btree_cur		*cur,
346 	xfs_failaddr_t			fa,
347 	const struct xfs_rmap_irec	*irec)
348 {
349 	struct xfs_mount		*mp = cur->bc_mp;
350 
351 	if (xfs_btree_is_mem_rmap(cur->bc_ops))
352 		xfs_warn(mp,
353  "In-Memory Reverse Mapping BTree record corruption detected at %pS!", fa);
354 	else if (xfs_btree_is_rtrmap(cur->bc_ops))
355 		xfs_warn(mp,
356  "RT Reverse Mapping BTree record corruption in rtgroup %u detected at %pS!",
357 				cur->bc_group->xg_gno, fa);
358 	else
359 		xfs_warn(mp,
360  "Reverse Mapping BTree record corruption in AG %d detected at %pS!",
361 			cur->bc_group->xg_gno, fa);
362 	xfs_warn(mp,
363 		"Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x",
364 		irec->rm_owner, irec->rm_flags, irec->rm_startblock,
365 		irec->rm_blockcount);
366 	xfs_btree_mark_sick(cur);
367 	return -EFSCORRUPTED;
368 }
369 
370 /*
371  * Get the data from the pointed-to record.
372  */
373 int
xfs_rmap_get_rec(struct xfs_btree_cur * cur,struct xfs_rmap_irec * irec,int * stat)374 xfs_rmap_get_rec(
375 	struct xfs_btree_cur	*cur,
376 	struct xfs_rmap_irec	*irec,
377 	int			*stat)
378 {
379 	union xfs_btree_rec	*rec;
380 	xfs_failaddr_t		fa;
381 	int			error;
382 
383 	error = xfs_btree_get_rec(cur, &rec, stat);
384 	if (error || !*stat)
385 		return error;
386 
387 	fa = xfs_rmap_btrec_to_irec(rec, irec);
388 	if (!fa)
389 		fa = xfs_rmap_check_btrec(cur, irec);
390 	if (fa)
391 		return xfs_rmap_complain_bad_rec(cur, fa, irec);
392 
393 	return 0;
394 }
395 
396 struct xfs_find_left_neighbor_info {
397 	struct xfs_rmap_irec	high;
398 	struct xfs_rmap_irec	*irec;
399 };
400 
401 /* For each rmap given, figure out if it matches the key we want. */
402 STATIC int
xfs_rmap_find_left_neighbor_helper(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)403 xfs_rmap_find_left_neighbor_helper(
404 	struct xfs_btree_cur		*cur,
405 	const struct xfs_rmap_irec	*rec,
406 	void				*priv)
407 {
408 	struct xfs_find_left_neighbor_info	*info = priv;
409 
410 	trace_xfs_rmap_find_left_neighbor_candidate(cur, rec->rm_startblock,
411 			rec->rm_blockcount, rec->rm_owner, rec->rm_offset,
412 			rec->rm_flags);
413 
414 	if (rec->rm_owner != info->high.rm_owner)
415 		return 0;
416 	if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) &&
417 	    !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
418 	    rec->rm_offset + rec->rm_blockcount - 1 != info->high.rm_offset)
419 		return 0;
420 
421 	*info->irec = *rec;
422 	return -ECANCELED;
423 }
424 
425 /*
426  * Find the record to the left of the given extent, being careful only to
427  * return a match with the same owner and adjacent physical and logical
428  * block ranges.
429  */
430 STATIC int
xfs_rmap_find_left_neighbor(struct xfs_btree_cur * cur,xfs_agblock_t bno,uint64_t owner,uint64_t offset,unsigned int flags,struct xfs_rmap_irec * irec,int * stat)431 xfs_rmap_find_left_neighbor(
432 	struct xfs_btree_cur	*cur,
433 	xfs_agblock_t		bno,
434 	uint64_t		owner,
435 	uint64_t		offset,
436 	unsigned int		flags,
437 	struct xfs_rmap_irec	*irec,
438 	int			*stat)
439 {
440 	struct xfs_find_left_neighbor_info	info;
441 	int			found = 0;
442 	int			error;
443 
444 	*stat = 0;
445 	if (bno == 0)
446 		return 0;
447 	info.high.rm_startblock = bno - 1;
448 	info.high.rm_owner = owner;
449 	if (!XFS_RMAP_NON_INODE_OWNER(owner) &&
450 	    !(flags & XFS_RMAP_BMBT_BLOCK)) {
451 		if (offset == 0)
452 			return 0;
453 		info.high.rm_offset = offset - 1;
454 	} else
455 		info.high.rm_offset = 0;
456 	info.high.rm_flags = flags;
457 	info.high.rm_blockcount = 0;
458 	info.irec = irec;
459 
460 	trace_xfs_rmap_find_left_neighbor_query(cur, bno, 0, owner, offset,
461 			flags);
462 
463 	/*
464 	 * Historically, we always used the range query to walk every reverse
465 	 * mapping that could possibly overlap the key that the caller asked
466 	 * for, and filter out the ones that don't.  That is very slow when
467 	 * there are a lot of records.
468 	 *
469 	 * However, there are two scenarios where the classic btree search can
470 	 * produce correct results -- if the index contains a record that is an
471 	 * exact match for the lookup key; and if there are no other records
472 	 * between the record we want and the key we supplied.
473 	 *
474 	 * As an optimization, try a non-overlapped lookup first.  This makes
475 	 * extent conversion and remap operations run a bit faster if the
476 	 * physical extents aren't being shared.  If we don't find what we
477 	 * want, we fall back to the overlapped query.
478 	 */
479 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec,
480 			&found);
481 	if (error)
482 		return error;
483 	if (found)
484 		error = xfs_rmap_find_left_neighbor_helper(cur, irec, &info);
485 	if (!error)
486 		error = xfs_rmap_query_range(cur, &info.high, &info.high,
487 				xfs_rmap_find_left_neighbor_helper, &info);
488 	if (error != -ECANCELED)
489 		return error;
490 
491 	*stat = 1;
492 	trace_xfs_rmap_find_left_neighbor_result(cur, irec->rm_startblock,
493 			irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
494 			irec->rm_flags);
495 	return 0;
496 }
497 
498 /* For each rmap given, figure out if it matches the key we want. */
499 STATIC int
xfs_rmap_lookup_le_range_helper(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)500 xfs_rmap_lookup_le_range_helper(
501 	struct xfs_btree_cur		*cur,
502 	const struct xfs_rmap_irec	*rec,
503 	void				*priv)
504 {
505 	struct xfs_find_left_neighbor_info	*info = priv;
506 
507 	trace_xfs_rmap_lookup_le_range_candidate(cur, rec->rm_startblock,
508 			rec->rm_blockcount, rec->rm_owner, rec->rm_offset,
509 			rec->rm_flags);
510 
511 	if (rec->rm_owner != info->high.rm_owner)
512 		return 0;
513 	if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) &&
514 	    !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
515 	    (rec->rm_offset > info->high.rm_offset ||
516 	     rec->rm_offset + rec->rm_blockcount <= info->high.rm_offset))
517 		return 0;
518 
519 	*info->irec = *rec;
520 	return -ECANCELED;
521 }
522 
523 /*
524  * Find the record to the left of the given extent, being careful only to
525  * return a match with the same owner and overlapping physical and logical
526  * block ranges.  This is the overlapping-interval version of
527  * xfs_rmap_lookup_le.
528  */
529 int
xfs_rmap_lookup_le_range(struct xfs_btree_cur * cur,xfs_agblock_t bno,uint64_t owner,uint64_t offset,unsigned int flags,struct xfs_rmap_irec * irec,int * stat)530 xfs_rmap_lookup_le_range(
531 	struct xfs_btree_cur	*cur,
532 	xfs_agblock_t		bno,
533 	uint64_t		owner,
534 	uint64_t		offset,
535 	unsigned int		flags,
536 	struct xfs_rmap_irec	*irec,
537 	int			*stat)
538 {
539 	struct xfs_find_left_neighbor_info	info;
540 	int			found = 0;
541 	int			error;
542 
543 	info.high.rm_startblock = bno;
544 	info.high.rm_owner = owner;
545 	if (!XFS_RMAP_NON_INODE_OWNER(owner) && !(flags & XFS_RMAP_BMBT_BLOCK))
546 		info.high.rm_offset = offset;
547 	else
548 		info.high.rm_offset = 0;
549 	info.high.rm_flags = flags;
550 	info.high.rm_blockcount = 0;
551 	*stat = 0;
552 	info.irec = irec;
553 
554 	trace_xfs_rmap_lookup_le_range(cur, bno, 0, owner, offset, flags);
555 
556 	/*
557 	 * Historically, we always used the range query to walk every reverse
558 	 * mapping that could possibly overlap the key that the caller asked
559 	 * for, and filter out the ones that don't.  That is very slow when
560 	 * there are a lot of records.
561 	 *
562 	 * However, there are two scenarios where the classic btree search can
563 	 * produce correct results -- if the index contains a record that is an
564 	 * exact match for the lookup key; and if there are no other records
565 	 * between the record we want and the key we supplied.
566 	 *
567 	 * As an optimization, try a non-overlapped lookup first.  This makes
568 	 * scrub run much faster on most filesystems because bmbt records are
569 	 * usually an exact match for rmap records.  If we don't find what we
570 	 * want, we fall back to the overlapped query.
571 	 */
572 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec,
573 			&found);
574 	if (error)
575 		return error;
576 	if (found)
577 		error = xfs_rmap_lookup_le_range_helper(cur, irec, &info);
578 	if (!error)
579 		error = xfs_rmap_query_range(cur, &info.high, &info.high,
580 				xfs_rmap_lookup_le_range_helper, &info);
581 	if (error != -ECANCELED)
582 		return error;
583 
584 	*stat = 1;
585 	trace_xfs_rmap_lookup_le_range_result(cur, irec->rm_startblock,
586 			irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
587 			irec->rm_flags);
588 	return 0;
589 }
590 
591 /*
592  * Perform all the relevant owner checks for a removal op.  If we're doing an
593  * unknown-owner removal then we have no owner information to check.
594  */
595 static int
xfs_rmap_free_check_owner(struct xfs_btree_cur * cur,uint64_t ltoff,struct xfs_rmap_irec * rec,xfs_extlen_t len,uint64_t owner,uint64_t offset,unsigned int flags)596 xfs_rmap_free_check_owner(
597 	struct xfs_btree_cur	*cur,
598 	uint64_t		ltoff,
599 	struct xfs_rmap_irec	*rec,
600 	xfs_extlen_t		len,
601 	uint64_t		owner,
602 	uint64_t		offset,
603 	unsigned int		flags)
604 {
605 	struct xfs_mount	*mp = cur->bc_mp;
606 	int			error = 0;
607 
608 	if (owner == XFS_RMAP_OWN_UNKNOWN)
609 		return 0;
610 
611 	/* Make sure the unwritten flag matches. */
612 	if (XFS_IS_CORRUPT(mp,
613 			   (flags & XFS_RMAP_UNWRITTEN) !=
614 			   (rec->rm_flags & XFS_RMAP_UNWRITTEN))) {
615 		xfs_btree_mark_sick(cur);
616 		error = -EFSCORRUPTED;
617 		goto out;
618 	}
619 
620 	/* Make sure the owner matches what we expect to find in the tree. */
621 	if (XFS_IS_CORRUPT(mp, owner != rec->rm_owner)) {
622 		xfs_btree_mark_sick(cur);
623 		error = -EFSCORRUPTED;
624 		goto out;
625 	}
626 
627 	/* Check the offset, if necessary. */
628 	if (XFS_RMAP_NON_INODE_OWNER(owner))
629 		goto out;
630 
631 	if (flags & XFS_RMAP_BMBT_BLOCK) {
632 		if (XFS_IS_CORRUPT(mp,
633 				   !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))) {
634 			xfs_btree_mark_sick(cur);
635 			error = -EFSCORRUPTED;
636 			goto out;
637 		}
638 	} else {
639 		if (XFS_IS_CORRUPT(mp, rec->rm_offset > offset)) {
640 			xfs_btree_mark_sick(cur);
641 			error = -EFSCORRUPTED;
642 			goto out;
643 		}
644 		if (XFS_IS_CORRUPT(mp,
645 				   offset + len > ltoff + rec->rm_blockcount)) {
646 			xfs_btree_mark_sick(cur);
647 			error = -EFSCORRUPTED;
648 			goto out;
649 		}
650 	}
651 
652 out:
653 	return error;
654 }
655 
656 /*
657  * Find the extent in the rmap btree and remove it.
658  *
659  * The record we find should always be an exact match for the extent that we're
660  * looking for, since we insert them into the btree without modification.
661  *
662  * Special Case #1: when growing the filesystem, we "free" an extent when
663  * growing the last AG. This extent is new space and so it is not tracked as
664  * used space in the btree. The growfs code will pass in an owner of
665  * XFS_RMAP_OWN_NULL to indicate that it expected that there is no owner of this
666  * extent. We verify that - the extent lookup result in a record that does not
667  * overlap.
668  *
669  * Special Case #2: EFIs do not record the owner of the extent, so when
670  * recovering EFIs from the log we pass in XFS_RMAP_OWN_UNKNOWN to tell the rmap
671  * btree to ignore the owner (i.e. wildcard match) so we don't trigger
672  * corruption checks during log recovery.
673  */
674 STATIC int
xfs_rmap_unmap(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,bool unwritten,const struct xfs_owner_info * oinfo)675 xfs_rmap_unmap(
676 	struct xfs_btree_cur		*cur,
677 	xfs_agblock_t			bno,
678 	xfs_extlen_t			len,
679 	bool				unwritten,
680 	const struct xfs_owner_info	*oinfo)
681 {
682 	struct xfs_mount		*mp = cur->bc_mp;
683 	struct xfs_rmap_irec		ltrec;
684 	uint64_t			ltoff;
685 	int				error = 0;
686 	int				i;
687 	uint64_t			owner;
688 	uint64_t			offset;
689 	unsigned int			flags;
690 	bool				ignore_off;
691 
692 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
693 	ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) ||
694 			(flags & XFS_RMAP_BMBT_BLOCK);
695 	if (unwritten)
696 		flags |= XFS_RMAP_UNWRITTEN;
697 	trace_xfs_rmap_unmap(cur, bno, len, unwritten, oinfo);
698 
699 	/*
700 	 * We should always have a left record because there's a static record
701 	 * for the AG headers at rm_startblock == 0 created by mkfs/growfs that
702 	 * will not ever be removed from the tree.
703 	 */
704 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &ltrec, &i);
705 	if (error)
706 		goto out_error;
707 	if (XFS_IS_CORRUPT(mp, i != 1)) {
708 		xfs_btree_mark_sick(cur);
709 		error = -EFSCORRUPTED;
710 		goto out_error;
711 	}
712 
713 	trace_xfs_rmap_lookup_le_range_result(cur, ltrec.rm_startblock,
714 			ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset,
715 			ltrec.rm_flags);
716 	ltoff = ltrec.rm_offset;
717 
718 	/*
719 	 * For growfs, the incoming extent must be beyond the left record we
720 	 * just found as it is new space and won't be used by anyone. This is
721 	 * just a corruption check as we don't actually do anything with this
722 	 * extent.  Note that we need to use >= instead of > because it might
723 	 * be the case that the "left" extent goes all the way to EOFS.
724 	 */
725 	if (owner == XFS_RMAP_OWN_NULL) {
726 		if (XFS_IS_CORRUPT(mp,
727 				   bno <
728 				   ltrec.rm_startblock + ltrec.rm_blockcount)) {
729 			xfs_btree_mark_sick(cur);
730 			error = -EFSCORRUPTED;
731 			goto out_error;
732 		}
733 		goto out_done;
734 	}
735 
736 	/*
737 	 * If we're doing an unknown-owner removal for EFI recovery, we expect
738 	 * to find the full range in the rmapbt or nothing at all.  If we
739 	 * don't find any rmaps overlapping either end of the range, we're
740 	 * done.  Hopefully this means that the EFI creator already queued
741 	 * (and finished) a RUI to remove the rmap.
742 	 */
743 	if (owner == XFS_RMAP_OWN_UNKNOWN &&
744 	    ltrec.rm_startblock + ltrec.rm_blockcount <= bno) {
745 		struct xfs_rmap_irec    rtrec;
746 
747 		error = xfs_btree_increment(cur, 0, &i);
748 		if (error)
749 			goto out_error;
750 		if (i == 0)
751 			goto out_done;
752 		error = xfs_rmap_get_rec(cur, &rtrec, &i);
753 		if (error)
754 			goto out_error;
755 		if (XFS_IS_CORRUPT(mp, i != 1)) {
756 			xfs_btree_mark_sick(cur);
757 			error = -EFSCORRUPTED;
758 			goto out_error;
759 		}
760 		if (rtrec.rm_startblock >= bno + len)
761 			goto out_done;
762 	}
763 
764 	/* Make sure the extent we found covers the entire freeing range. */
765 	if (XFS_IS_CORRUPT(mp,
766 			   ltrec.rm_startblock > bno ||
767 			   ltrec.rm_startblock + ltrec.rm_blockcount <
768 			   bno + len)) {
769 		xfs_btree_mark_sick(cur);
770 		error = -EFSCORRUPTED;
771 		goto out_error;
772 	}
773 
774 	/* Check owner information. */
775 	error = xfs_rmap_free_check_owner(cur, ltoff, &ltrec, len, owner,
776 			offset, flags);
777 	if (error)
778 		goto out_error;
779 
780 	if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
781 		/* exact match, simply remove the record from rmap tree */
782 		trace_xfs_rmap_delete(cur, ltrec.rm_startblock,
783 				ltrec.rm_blockcount, ltrec.rm_owner,
784 				ltrec.rm_offset, ltrec.rm_flags);
785 		error = xfs_btree_delete(cur, &i);
786 		if (error)
787 			goto out_error;
788 		if (XFS_IS_CORRUPT(mp, i != 1)) {
789 			xfs_btree_mark_sick(cur);
790 			error = -EFSCORRUPTED;
791 			goto out_error;
792 		}
793 	} else if (ltrec.rm_startblock == bno) {
794 		/*
795 		 * overlap left hand side of extent: move the start, trim the
796 		 * length and update the current record.
797 		 *
798 		 *       ltbno                ltlen
799 		 * Orig:    |oooooooooooooooooooo|
800 		 * Freeing: |fffffffff|
801 		 * Result:            |rrrrrrrrrr|
802 		 *         bno       len
803 		 */
804 		ltrec.rm_startblock += len;
805 		ltrec.rm_blockcount -= len;
806 		if (!ignore_off)
807 			ltrec.rm_offset += len;
808 		error = xfs_rmap_update(cur, &ltrec);
809 		if (error)
810 			goto out_error;
811 	} else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) {
812 		/*
813 		 * overlap right hand side of extent: trim the length and update
814 		 * the current record.
815 		 *
816 		 *       ltbno                ltlen
817 		 * Orig:    |oooooooooooooooooooo|
818 		 * Freeing:            |fffffffff|
819 		 * Result:  |rrrrrrrrrr|
820 		 *                    bno       len
821 		 */
822 		ltrec.rm_blockcount -= len;
823 		error = xfs_rmap_update(cur, &ltrec);
824 		if (error)
825 			goto out_error;
826 	} else {
827 
828 		/*
829 		 * overlap middle of extent: trim the length of the existing
830 		 * record to the length of the new left-extent size, increment
831 		 * the insertion position so we can insert a new record
832 		 * containing the remaining right-extent space.
833 		 *
834 		 *       ltbno                ltlen
835 		 * Orig:    |oooooooooooooooooooo|
836 		 * Freeing:       |fffffffff|
837 		 * Result:  |rrrrr|         |rrrr|
838 		 *               bno       len
839 		 */
840 		xfs_extlen_t	orig_len = ltrec.rm_blockcount;
841 
842 		ltrec.rm_blockcount = bno - ltrec.rm_startblock;
843 		error = xfs_rmap_update(cur, &ltrec);
844 		if (error)
845 			goto out_error;
846 
847 		error = xfs_btree_increment(cur, 0, &i);
848 		if (error)
849 			goto out_error;
850 
851 		cur->bc_rec.r.rm_startblock = bno + len;
852 		cur->bc_rec.r.rm_blockcount = orig_len - len -
853 						     ltrec.rm_blockcount;
854 		cur->bc_rec.r.rm_owner = ltrec.rm_owner;
855 		if (ignore_off)
856 			cur->bc_rec.r.rm_offset = 0;
857 		else
858 			cur->bc_rec.r.rm_offset = offset + len;
859 		cur->bc_rec.r.rm_flags = flags;
860 		trace_xfs_rmap_insert(cur, cur->bc_rec.r.rm_startblock,
861 				cur->bc_rec.r.rm_blockcount,
862 				cur->bc_rec.r.rm_owner,
863 				cur->bc_rec.r.rm_offset,
864 				cur->bc_rec.r.rm_flags);
865 		error = xfs_btree_insert(cur, &i);
866 		if (error)
867 			goto out_error;
868 	}
869 
870 out_done:
871 	trace_xfs_rmap_unmap_done(cur, bno, len, unwritten, oinfo);
872 out_error:
873 	if (error)
874 		trace_xfs_rmap_unmap_error(cur, error, _RET_IP_);
875 	return error;
876 }
877 
878 #ifdef CONFIG_XFS_LIVE_HOOKS
879 /*
880  * Use a static key here to reduce the overhead of rmapbt live updates.  If
881  * the compiler supports jump labels, the static branch will be replaced by a
882  * nop sled when there are no hook users.  Online fsck is currently the only
883  * caller, so this is a reasonable tradeoff.
884  *
885  * Note: Patching the kernel code requires taking the cpu hotplug lock.  Other
886  * parts of the kernel allocate memory with that lock held, which means that
887  * XFS callers cannot hold any locks that might be used by memory reclaim or
888  * writeback when calling the static_branch_{inc,dec} functions.
889  */
890 DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_rmap_hooks_switch);
891 
892 void
xfs_rmap_hook_disable(void)893 xfs_rmap_hook_disable(void)
894 {
895 	xfs_hooks_switch_off(&xfs_rmap_hooks_switch);
896 }
897 
898 void
xfs_rmap_hook_enable(void)899 xfs_rmap_hook_enable(void)
900 {
901 	xfs_hooks_switch_on(&xfs_rmap_hooks_switch);
902 }
903 
904 /* Call downstream hooks for a reverse mapping update. */
905 static inline void
xfs_rmap_update_hook(struct xfs_trans * tp,struct xfs_group * xg,enum xfs_rmap_intent_type op,xfs_agblock_t startblock,xfs_extlen_t blockcount,bool unwritten,const struct xfs_owner_info * oinfo)906 xfs_rmap_update_hook(
907 	struct xfs_trans		*tp,
908 	struct xfs_group		*xg,
909 	enum xfs_rmap_intent_type	op,
910 	xfs_agblock_t			startblock,
911 	xfs_extlen_t			blockcount,
912 	bool				unwritten,
913 	const struct xfs_owner_info	*oinfo)
914 {
915 	if (xfs_hooks_switched_on(&xfs_rmap_hooks_switch)) {
916 		struct xfs_rmap_update_params	p = {
917 			.startblock	= startblock,
918 			.blockcount	= blockcount,
919 			.unwritten	= unwritten,
920 			.oinfo		= *oinfo, /* struct copy */
921 		};
922 
923 		if (xg)
924 			xfs_hooks_call(&xg->xg_rmap_update_hooks, op, &p);
925 	}
926 }
927 
928 /* Call the specified function during a reverse mapping update. */
929 int
xfs_rmap_hook_add(struct xfs_group * xg,struct xfs_rmap_hook * hook)930 xfs_rmap_hook_add(
931 	struct xfs_group	*xg,
932 	struct xfs_rmap_hook	*hook)
933 {
934 	return xfs_hooks_add(&xg->xg_rmap_update_hooks, &hook->rmap_hook);
935 }
936 
937 /* Stop calling the specified function during a reverse mapping update. */
938 void
xfs_rmap_hook_del(struct xfs_group * xg,struct xfs_rmap_hook * hook)939 xfs_rmap_hook_del(
940 	struct xfs_group	*xg,
941 	struct xfs_rmap_hook	*hook)
942 {
943 	xfs_hooks_del(&xg->xg_rmap_update_hooks, &hook->rmap_hook);
944 }
945 
946 /* Configure rmap update hook functions. */
947 void
xfs_rmap_hook_setup(struct xfs_rmap_hook * hook,notifier_fn_t mod_fn)948 xfs_rmap_hook_setup(
949 	struct xfs_rmap_hook	*hook,
950 	notifier_fn_t		mod_fn)
951 {
952 	xfs_hook_setup(&hook->rmap_hook, mod_fn);
953 }
954 #else
955 # define xfs_rmap_update_hook(t, p, o, s, b, u, oi)	do { } while (0)
956 #endif /* CONFIG_XFS_LIVE_HOOKS */
957 
958 /*
959  * Remove a reference to an extent in the rmap btree.
960  */
961 int
xfs_rmap_free(struct xfs_trans * tp,struct xfs_buf * agbp,struct xfs_perag * pag,xfs_agblock_t bno,xfs_extlen_t len,const struct xfs_owner_info * oinfo)962 xfs_rmap_free(
963 	struct xfs_trans		*tp,
964 	struct xfs_buf			*agbp,
965 	struct xfs_perag		*pag,
966 	xfs_agblock_t			bno,
967 	xfs_extlen_t			len,
968 	const struct xfs_owner_info	*oinfo)
969 {
970 	struct xfs_mount		*mp = tp->t_mountp;
971 	struct xfs_btree_cur		*cur;
972 	int				error;
973 
974 	if (!xfs_has_rmapbt(mp))
975 		return 0;
976 
977 	cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
978 	xfs_rmap_update_hook(tp, pag_group(pag), XFS_RMAP_UNMAP, bno, len,
979 			false, oinfo);
980 	error = xfs_rmap_unmap(cur, bno, len, false, oinfo);
981 
982 	xfs_btree_del_cursor(cur, error);
983 	return error;
984 }
985 
986 /*
987  * A mergeable rmap must have the same owner and the same values for
988  * the unwritten, attr_fork, and bmbt flags.  The startblock and
989  * offset are checked separately.
990  */
991 static bool
xfs_rmap_is_mergeable(struct xfs_rmap_irec * irec,uint64_t owner,unsigned int flags)992 xfs_rmap_is_mergeable(
993 	struct xfs_rmap_irec	*irec,
994 	uint64_t		owner,
995 	unsigned int		flags)
996 {
997 	if (irec->rm_owner == XFS_RMAP_OWN_NULL)
998 		return false;
999 	if (irec->rm_owner != owner)
1000 		return false;
1001 	if ((flags & XFS_RMAP_UNWRITTEN) ^
1002 	    (irec->rm_flags & XFS_RMAP_UNWRITTEN))
1003 		return false;
1004 	if ((flags & XFS_RMAP_ATTR_FORK) ^
1005 	    (irec->rm_flags & XFS_RMAP_ATTR_FORK))
1006 		return false;
1007 	if ((flags & XFS_RMAP_BMBT_BLOCK) ^
1008 	    (irec->rm_flags & XFS_RMAP_BMBT_BLOCK))
1009 		return false;
1010 	return true;
1011 }
1012 
1013 /*
1014  * When we allocate a new block, the first thing we do is add a reference to
1015  * the extent in the rmap btree. This takes the form of a [agbno, length,
1016  * owner, offset] record.  Flags are encoded in the high bits of the offset
1017  * field.
1018  */
1019 STATIC int
xfs_rmap_map(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,bool unwritten,const struct xfs_owner_info * oinfo)1020 xfs_rmap_map(
1021 	struct xfs_btree_cur		*cur,
1022 	xfs_agblock_t			bno,
1023 	xfs_extlen_t			len,
1024 	bool				unwritten,
1025 	const struct xfs_owner_info	*oinfo)
1026 {
1027 	struct xfs_mount		*mp = cur->bc_mp;
1028 	struct xfs_rmap_irec		ltrec;
1029 	struct xfs_rmap_irec		gtrec;
1030 	int				have_gt;
1031 	int				have_lt;
1032 	int				error = 0;
1033 	int				i;
1034 	uint64_t			owner;
1035 	uint64_t			offset;
1036 	unsigned int			flags = 0;
1037 	bool				ignore_off;
1038 
1039 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
1040 	ASSERT(owner != 0);
1041 	ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) ||
1042 			(flags & XFS_RMAP_BMBT_BLOCK);
1043 	if (unwritten)
1044 		flags |= XFS_RMAP_UNWRITTEN;
1045 	trace_xfs_rmap_map(cur, bno, len, unwritten, oinfo);
1046 	ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
1047 
1048 	/*
1049 	 * For the initial lookup, look for an exact match or the left-adjacent
1050 	 * record for our insertion point. This will also give us the record for
1051 	 * start block contiguity tests.
1052 	 */
1053 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &ltrec,
1054 			&have_lt);
1055 	if (error)
1056 		goto out_error;
1057 	if (have_lt) {
1058 		trace_xfs_rmap_lookup_le_range_result(cur, ltrec.rm_startblock,
1059 				ltrec.rm_blockcount, ltrec.rm_owner,
1060 				ltrec.rm_offset, ltrec.rm_flags);
1061 
1062 		if (!xfs_rmap_is_mergeable(&ltrec, owner, flags))
1063 			have_lt = 0;
1064 	}
1065 
1066 	if (XFS_IS_CORRUPT(mp,
1067 			   have_lt != 0 &&
1068 			   ltrec.rm_startblock + ltrec.rm_blockcount > bno)) {
1069 		xfs_btree_mark_sick(cur);
1070 		error = -EFSCORRUPTED;
1071 		goto out_error;
1072 	}
1073 
1074 	/*
1075 	 * Increment the cursor to see if we have a right-adjacent record to our
1076 	 * insertion point. This will give us the record for end block
1077 	 * contiguity tests.
1078 	 */
1079 	error = xfs_btree_increment(cur, 0, &have_gt);
1080 	if (error)
1081 		goto out_error;
1082 	if (have_gt) {
1083 		error = xfs_rmap_get_rec(cur, &gtrec, &have_gt);
1084 		if (error)
1085 			goto out_error;
1086 		if (XFS_IS_CORRUPT(mp, have_gt != 1)) {
1087 			xfs_btree_mark_sick(cur);
1088 			error = -EFSCORRUPTED;
1089 			goto out_error;
1090 		}
1091 		if (XFS_IS_CORRUPT(mp, bno + len > gtrec.rm_startblock)) {
1092 			xfs_btree_mark_sick(cur);
1093 			error = -EFSCORRUPTED;
1094 			goto out_error;
1095 		}
1096 		trace_xfs_rmap_find_right_neighbor_result(cur,
1097 				gtrec.rm_startblock, gtrec.rm_blockcount,
1098 				gtrec.rm_owner, gtrec.rm_offset,
1099 				gtrec.rm_flags);
1100 		if (!xfs_rmap_is_mergeable(&gtrec, owner, flags))
1101 			have_gt = 0;
1102 	}
1103 
1104 	/*
1105 	 * Note: cursor currently points one record to the right of ltrec, even
1106 	 * if there is no record in the tree to the right.
1107 	 */
1108 	if (have_lt &&
1109 	    ltrec.rm_startblock + ltrec.rm_blockcount == bno &&
1110 	    (ignore_off || ltrec.rm_offset + ltrec.rm_blockcount == offset)) {
1111 		/*
1112 		 * left edge contiguous, merge into left record.
1113 		 *
1114 		 *       ltbno     ltlen
1115 		 * orig:   |ooooooooo|
1116 		 * adding:           |aaaaaaaaa|
1117 		 * result: |rrrrrrrrrrrrrrrrrrr|
1118 		 *                  bno       len
1119 		 */
1120 		ltrec.rm_blockcount += len;
1121 		if (have_gt &&
1122 		    bno + len == gtrec.rm_startblock &&
1123 		    (ignore_off || offset + len == gtrec.rm_offset) &&
1124 		    (unsigned long)ltrec.rm_blockcount + len +
1125 				gtrec.rm_blockcount <= XFS_RMAP_LEN_MAX) {
1126 			/*
1127 			 * right edge also contiguous, delete right record
1128 			 * and merge into left record.
1129 			 *
1130 			 *       ltbno     ltlen    gtbno     gtlen
1131 			 * orig:   |ooooooooo|         |ooooooooo|
1132 			 * adding:           |aaaaaaaaa|
1133 			 * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
1134 			 */
1135 			ltrec.rm_blockcount += gtrec.rm_blockcount;
1136 			trace_xfs_rmap_delete(cur, gtrec.rm_startblock,
1137 					gtrec.rm_blockcount, gtrec.rm_owner,
1138 					gtrec.rm_offset, gtrec.rm_flags);
1139 			error = xfs_btree_delete(cur, &i);
1140 			if (error)
1141 				goto out_error;
1142 			if (XFS_IS_CORRUPT(mp, i != 1)) {
1143 				xfs_btree_mark_sick(cur);
1144 				error = -EFSCORRUPTED;
1145 				goto out_error;
1146 			}
1147 		}
1148 
1149 		/* point the cursor back to the left record and update */
1150 		error = xfs_btree_decrement(cur, 0, &have_gt);
1151 		if (error)
1152 			goto out_error;
1153 		error = xfs_rmap_update(cur, &ltrec);
1154 		if (error)
1155 			goto out_error;
1156 	} else if (have_gt &&
1157 		   bno + len == gtrec.rm_startblock &&
1158 		   (ignore_off || offset + len == gtrec.rm_offset)) {
1159 		/*
1160 		 * right edge contiguous, merge into right record.
1161 		 *
1162 		 *                 gtbno     gtlen
1163 		 * Orig:             |ooooooooo|
1164 		 * adding: |aaaaaaaaa|
1165 		 * Result: |rrrrrrrrrrrrrrrrrrr|
1166 		 *        bno       len
1167 		 */
1168 		gtrec.rm_startblock = bno;
1169 		gtrec.rm_blockcount += len;
1170 		if (!ignore_off)
1171 			gtrec.rm_offset = offset;
1172 		error = xfs_rmap_update(cur, &gtrec);
1173 		if (error)
1174 			goto out_error;
1175 	} else {
1176 		/*
1177 		 * no contiguous edge with identical owner, insert
1178 		 * new record at current cursor position.
1179 		 */
1180 		cur->bc_rec.r.rm_startblock = bno;
1181 		cur->bc_rec.r.rm_blockcount = len;
1182 		cur->bc_rec.r.rm_owner = owner;
1183 		cur->bc_rec.r.rm_offset = offset;
1184 		cur->bc_rec.r.rm_flags = flags;
1185 		trace_xfs_rmap_insert(cur, bno, len, owner, offset, flags);
1186 		error = xfs_btree_insert(cur, &i);
1187 		if (error)
1188 			goto out_error;
1189 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1190 			xfs_btree_mark_sick(cur);
1191 			error = -EFSCORRUPTED;
1192 			goto out_error;
1193 		}
1194 	}
1195 
1196 	trace_xfs_rmap_map_done(cur, bno, len, unwritten, oinfo);
1197 out_error:
1198 	if (error)
1199 		trace_xfs_rmap_map_error(cur, error, _RET_IP_);
1200 	return error;
1201 }
1202 
1203 /*
1204  * Add a reference to an extent in the rmap btree.
1205  */
1206 int
xfs_rmap_alloc(struct xfs_trans * tp,struct xfs_buf * agbp,struct xfs_perag * pag,xfs_agblock_t bno,xfs_extlen_t len,const struct xfs_owner_info * oinfo)1207 xfs_rmap_alloc(
1208 	struct xfs_trans		*tp,
1209 	struct xfs_buf			*agbp,
1210 	struct xfs_perag		*pag,
1211 	xfs_agblock_t			bno,
1212 	xfs_extlen_t			len,
1213 	const struct xfs_owner_info	*oinfo)
1214 {
1215 	struct xfs_mount		*mp = tp->t_mountp;
1216 	struct xfs_btree_cur		*cur;
1217 	int				error;
1218 
1219 	if (!xfs_has_rmapbt(mp))
1220 		return 0;
1221 
1222 	cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
1223 	xfs_rmap_update_hook(tp, pag_group(pag), XFS_RMAP_MAP, bno, len, false,
1224 			oinfo);
1225 	error = xfs_rmap_map(cur, bno, len, false, oinfo);
1226 
1227 	xfs_btree_del_cursor(cur, error);
1228 	return error;
1229 }
1230 
1231 #define RMAP_LEFT_CONTIG	(1 << 0)
1232 #define RMAP_RIGHT_CONTIG	(1 << 1)
1233 #define RMAP_LEFT_FILLING	(1 << 2)
1234 #define RMAP_RIGHT_FILLING	(1 << 3)
1235 #define RMAP_LEFT_VALID		(1 << 6)
1236 #define RMAP_RIGHT_VALID	(1 << 7)
1237 
1238 #define LEFT		r[0]
1239 #define RIGHT		r[1]
1240 #define PREV		r[2]
1241 #define NEW		r[3]
1242 
1243 /*
1244  * Convert an unwritten extent to a real extent or vice versa.
1245  * Does not handle overlapping extents.
1246  */
1247 STATIC int
xfs_rmap_convert(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,bool unwritten,const struct xfs_owner_info * oinfo)1248 xfs_rmap_convert(
1249 	struct xfs_btree_cur		*cur,
1250 	xfs_agblock_t			bno,
1251 	xfs_extlen_t			len,
1252 	bool				unwritten,
1253 	const struct xfs_owner_info	*oinfo)
1254 {
1255 	struct xfs_mount		*mp = cur->bc_mp;
1256 	struct xfs_rmap_irec		r[4];	/* neighbor extent entries */
1257 						/* left is 0, right is 1, */
1258 						/* prev is 2, new is 3 */
1259 	uint64_t		owner;
1260 	uint64_t		offset;
1261 	uint64_t		new_endoff;
1262 	unsigned int		oldext;
1263 	unsigned int		newext;
1264 	unsigned int		flags = 0;
1265 	int			i;
1266 	int			state = 0;
1267 	int			error;
1268 
1269 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
1270 	ASSERT(!(XFS_RMAP_NON_INODE_OWNER(owner) ||
1271 			(flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))));
1272 	oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0;
1273 	new_endoff = offset + len;
1274 	trace_xfs_rmap_convert(cur, bno, len, unwritten, oinfo);
1275 
1276 	/*
1277 	 * For the initial lookup, look for an exact match or the left-adjacent
1278 	 * record for our insertion point. This will also give us the record for
1279 	 * start block contiguity tests.
1280 	 */
1281 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, &PREV, &i);
1282 	if (error)
1283 		goto done;
1284 	if (XFS_IS_CORRUPT(mp, i != 1)) {
1285 		xfs_btree_mark_sick(cur);
1286 		error = -EFSCORRUPTED;
1287 		goto done;
1288 	}
1289 
1290 	trace_xfs_rmap_lookup_le_range_result(cur, PREV.rm_startblock,
1291 			PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset,
1292 			PREV.rm_flags);
1293 
1294 	ASSERT(PREV.rm_offset <= offset);
1295 	ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff);
1296 	ASSERT((PREV.rm_flags & XFS_RMAP_UNWRITTEN) == oldext);
1297 	newext = ~oldext & XFS_RMAP_UNWRITTEN;
1298 
1299 	/*
1300 	 * Set flags determining what part of the previous oldext allocation
1301 	 * extent is being replaced by a newext allocation.
1302 	 */
1303 	if (PREV.rm_offset == offset)
1304 		state |= RMAP_LEFT_FILLING;
1305 	if (PREV.rm_offset + PREV.rm_blockcount == new_endoff)
1306 		state |= RMAP_RIGHT_FILLING;
1307 
1308 	/*
1309 	 * Decrement the cursor to see if we have a left-adjacent record to our
1310 	 * insertion point. This will give us the record for end block
1311 	 * contiguity tests.
1312 	 */
1313 	error = xfs_btree_decrement(cur, 0, &i);
1314 	if (error)
1315 		goto done;
1316 	if (i) {
1317 		state |= RMAP_LEFT_VALID;
1318 		error = xfs_rmap_get_rec(cur, &LEFT, &i);
1319 		if (error)
1320 			goto done;
1321 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1322 			xfs_btree_mark_sick(cur);
1323 			error = -EFSCORRUPTED;
1324 			goto done;
1325 		}
1326 		if (XFS_IS_CORRUPT(mp,
1327 				   LEFT.rm_startblock + LEFT.rm_blockcount >
1328 				   bno)) {
1329 			xfs_btree_mark_sick(cur);
1330 			error = -EFSCORRUPTED;
1331 			goto done;
1332 		}
1333 		trace_xfs_rmap_find_left_neighbor_result(cur,
1334 				LEFT.rm_startblock, LEFT.rm_blockcount,
1335 				LEFT.rm_owner, LEFT.rm_offset, LEFT.rm_flags);
1336 		if (LEFT.rm_startblock + LEFT.rm_blockcount == bno &&
1337 		    LEFT.rm_offset + LEFT.rm_blockcount == offset &&
1338 		    xfs_rmap_is_mergeable(&LEFT, owner, newext))
1339 			state |= RMAP_LEFT_CONTIG;
1340 	}
1341 
1342 	/*
1343 	 * Increment the cursor to see if we have a right-adjacent record to our
1344 	 * insertion point. This will give us the record for end block
1345 	 * contiguity tests.
1346 	 */
1347 	error = xfs_btree_increment(cur, 0, &i);
1348 	if (error)
1349 		goto done;
1350 	if (XFS_IS_CORRUPT(mp, i != 1)) {
1351 		xfs_btree_mark_sick(cur);
1352 		error = -EFSCORRUPTED;
1353 		goto done;
1354 	}
1355 	error = xfs_btree_increment(cur, 0, &i);
1356 	if (error)
1357 		goto done;
1358 	if (i) {
1359 		state |= RMAP_RIGHT_VALID;
1360 		error = xfs_rmap_get_rec(cur, &RIGHT, &i);
1361 		if (error)
1362 			goto done;
1363 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1364 			xfs_btree_mark_sick(cur);
1365 			error = -EFSCORRUPTED;
1366 			goto done;
1367 		}
1368 		if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) {
1369 			xfs_btree_mark_sick(cur);
1370 			error = -EFSCORRUPTED;
1371 			goto done;
1372 		}
1373 		trace_xfs_rmap_find_right_neighbor_result(cur,
1374 				RIGHT.rm_startblock, RIGHT.rm_blockcount,
1375 				RIGHT.rm_owner, RIGHT.rm_offset,
1376 				RIGHT.rm_flags);
1377 		if (bno + len == RIGHT.rm_startblock &&
1378 		    offset + len == RIGHT.rm_offset &&
1379 		    xfs_rmap_is_mergeable(&RIGHT, owner, newext))
1380 			state |= RMAP_RIGHT_CONTIG;
1381 	}
1382 
1383 	/* check that left + prev + right is not too long */
1384 	if ((state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1385 			 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) ==
1386 	    (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1387 	     RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG) &&
1388 	    (unsigned long)LEFT.rm_blockcount + len +
1389 	     RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX)
1390 		state &= ~RMAP_RIGHT_CONTIG;
1391 
1392 	trace_xfs_rmap_convert_state(cur, state, _RET_IP_);
1393 
1394 	/* reset the cursor back to PREV */
1395 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, NULL, &i);
1396 	if (error)
1397 		goto done;
1398 	if (XFS_IS_CORRUPT(mp, i != 1)) {
1399 		xfs_btree_mark_sick(cur);
1400 		error = -EFSCORRUPTED;
1401 		goto done;
1402 	}
1403 
1404 	/*
1405 	 * Switch out based on the FILLING and CONTIG state bits.
1406 	 */
1407 	switch (state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1408 			 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) {
1409 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1410 	     RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1411 		/*
1412 		 * Setting all of a previous oldext extent to newext.
1413 		 * The left and right neighbors are both contiguous with new.
1414 		 */
1415 		error = xfs_btree_increment(cur, 0, &i);
1416 		if (error)
1417 			goto done;
1418 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1419 			xfs_btree_mark_sick(cur);
1420 			error = -EFSCORRUPTED;
1421 			goto done;
1422 		}
1423 		trace_xfs_rmap_delete(cur, RIGHT.rm_startblock,
1424 				RIGHT.rm_blockcount, RIGHT.rm_owner,
1425 				RIGHT.rm_offset, RIGHT.rm_flags);
1426 		error = xfs_btree_delete(cur, &i);
1427 		if (error)
1428 			goto done;
1429 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1430 			xfs_btree_mark_sick(cur);
1431 			error = -EFSCORRUPTED;
1432 			goto done;
1433 		}
1434 		error = xfs_btree_decrement(cur, 0, &i);
1435 		if (error)
1436 			goto done;
1437 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1438 			xfs_btree_mark_sick(cur);
1439 			error = -EFSCORRUPTED;
1440 			goto done;
1441 		}
1442 		trace_xfs_rmap_delete(cur, PREV.rm_startblock,
1443 				PREV.rm_blockcount, PREV.rm_owner,
1444 				PREV.rm_offset, PREV.rm_flags);
1445 		error = xfs_btree_delete(cur, &i);
1446 		if (error)
1447 			goto done;
1448 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1449 			xfs_btree_mark_sick(cur);
1450 			error = -EFSCORRUPTED;
1451 			goto done;
1452 		}
1453 		error = xfs_btree_decrement(cur, 0, &i);
1454 		if (error)
1455 			goto done;
1456 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1457 			xfs_btree_mark_sick(cur);
1458 			error = -EFSCORRUPTED;
1459 			goto done;
1460 		}
1461 		NEW = LEFT;
1462 		NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount;
1463 		error = xfs_rmap_update(cur, &NEW);
1464 		if (error)
1465 			goto done;
1466 		break;
1467 
1468 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
1469 		/*
1470 		 * Setting all of a previous oldext extent to newext.
1471 		 * The left neighbor is contiguous, the right is not.
1472 		 */
1473 		trace_xfs_rmap_delete(cur, PREV.rm_startblock,
1474 				PREV.rm_blockcount, PREV.rm_owner,
1475 				PREV.rm_offset, PREV.rm_flags);
1476 		error = xfs_btree_delete(cur, &i);
1477 		if (error)
1478 			goto done;
1479 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1480 			xfs_btree_mark_sick(cur);
1481 			error = -EFSCORRUPTED;
1482 			goto done;
1483 		}
1484 		error = xfs_btree_decrement(cur, 0, &i);
1485 		if (error)
1486 			goto done;
1487 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1488 			xfs_btree_mark_sick(cur);
1489 			error = -EFSCORRUPTED;
1490 			goto done;
1491 		}
1492 		NEW = LEFT;
1493 		NEW.rm_blockcount += PREV.rm_blockcount;
1494 		error = xfs_rmap_update(cur, &NEW);
1495 		if (error)
1496 			goto done;
1497 		break;
1498 
1499 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1500 		/*
1501 		 * Setting all of a previous oldext extent to newext.
1502 		 * The right neighbor is contiguous, the left is not.
1503 		 */
1504 		error = xfs_btree_increment(cur, 0, &i);
1505 		if (error)
1506 			goto done;
1507 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1508 			xfs_btree_mark_sick(cur);
1509 			error = -EFSCORRUPTED;
1510 			goto done;
1511 		}
1512 		trace_xfs_rmap_delete(cur, RIGHT.rm_startblock,
1513 				RIGHT.rm_blockcount, RIGHT.rm_owner,
1514 				RIGHT.rm_offset, RIGHT.rm_flags);
1515 		error = xfs_btree_delete(cur, &i);
1516 		if (error)
1517 			goto done;
1518 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1519 			xfs_btree_mark_sick(cur);
1520 			error = -EFSCORRUPTED;
1521 			goto done;
1522 		}
1523 		error = xfs_btree_decrement(cur, 0, &i);
1524 		if (error)
1525 			goto done;
1526 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1527 			xfs_btree_mark_sick(cur);
1528 			error = -EFSCORRUPTED;
1529 			goto done;
1530 		}
1531 		NEW = PREV;
1532 		NEW.rm_blockcount = len + RIGHT.rm_blockcount;
1533 		NEW.rm_flags = newext;
1534 		error = xfs_rmap_update(cur, &NEW);
1535 		if (error)
1536 			goto done;
1537 		break;
1538 
1539 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING:
1540 		/*
1541 		 * Setting all of a previous oldext extent to newext.
1542 		 * Neither the left nor right neighbors are contiguous with
1543 		 * the new one.
1544 		 */
1545 		NEW = PREV;
1546 		NEW.rm_flags = newext;
1547 		error = xfs_rmap_update(cur, &NEW);
1548 		if (error)
1549 			goto done;
1550 		break;
1551 
1552 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG:
1553 		/*
1554 		 * Setting the first part of a previous oldext extent to newext.
1555 		 * The left neighbor is contiguous.
1556 		 */
1557 		NEW = PREV;
1558 		NEW.rm_offset += len;
1559 		NEW.rm_startblock += len;
1560 		NEW.rm_blockcount -= len;
1561 		error = xfs_rmap_update(cur, &NEW);
1562 		if (error)
1563 			goto done;
1564 		error = xfs_btree_decrement(cur, 0, &i);
1565 		if (error)
1566 			goto done;
1567 		NEW = LEFT;
1568 		NEW.rm_blockcount += len;
1569 		error = xfs_rmap_update(cur, &NEW);
1570 		if (error)
1571 			goto done;
1572 		break;
1573 
1574 	case RMAP_LEFT_FILLING:
1575 		/*
1576 		 * Setting the first part of a previous oldext extent to newext.
1577 		 * The left neighbor is not contiguous.
1578 		 */
1579 		NEW = PREV;
1580 		NEW.rm_startblock += len;
1581 		NEW.rm_offset += len;
1582 		NEW.rm_blockcount -= len;
1583 		error = xfs_rmap_update(cur, &NEW);
1584 		if (error)
1585 			goto done;
1586 		NEW.rm_startblock = bno;
1587 		NEW.rm_owner = owner;
1588 		NEW.rm_offset = offset;
1589 		NEW.rm_blockcount = len;
1590 		NEW.rm_flags = newext;
1591 		cur->bc_rec.r = NEW;
1592 		trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext);
1593 		error = xfs_btree_insert(cur, &i);
1594 		if (error)
1595 			goto done;
1596 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1597 			xfs_btree_mark_sick(cur);
1598 			error = -EFSCORRUPTED;
1599 			goto done;
1600 		}
1601 		break;
1602 
1603 	case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1604 		/*
1605 		 * Setting the last part of a previous oldext extent to newext.
1606 		 * The right neighbor is contiguous with the new allocation.
1607 		 */
1608 		NEW = PREV;
1609 		NEW.rm_blockcount -= len;
1610 		error = xfs_rmap_update(cur, &NEW);
1611 		if (error)
1612 			goto done;
1613 		error = xfs_btree_increment(cur, 0, &i);
1614 		if (error)
1615 			goto done;
1616 		NEW = RIGHT;
1617 		NEW.rm_offset = offset;
1618 		NEW.rm_startblock = bno;
1619 		NEW.rm_blockcount += len;
1620 		error = xfs_rmap_update(cur, &NEW);
1621 		if (error)
1622 			goto done;
1623 		break;
1624 
1625 	case RMAP_RIGHT_FILLING:
1626 		/*
1627 		 * Setting the last part of a previous oldext extent to newext.
1628 		 * The right neighbor is not contiguous.
1629 		 */
1630 		NEW = PREV;
1631 		NEW.rm_blockcount -= len;
1632 		error = xfs_rmap_update(cur, &NEW);
1633 		if (error)
1634 			goto done;
1635 		error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset,
1636 				oldext, &i);
1637 		if (error)
1638 			goto done;
1639 		if (XFS_IS_CORRUPT(mp, i != 0)) {
1640 			xfs_btree_mark_sick(cur);
1641 			error = -EFSCORRUPTED;
1642 			goto done;
1643 		}
1644 		NEW.rm_startblock = bno;
1645 		NEW.rm_owner = owner;
1646 		NEW.rm_offset = offset;
1647 		NEW.rm_blockcount = len;
1648 		NEW.rm_flags = newext;
1649 		cur->bc_rec.r = NEW;
1650 		trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext);
1651 		error = xfs_btree_insert(cur, &i);
1652 		if (error)
1653 			goto done;
1654 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1655 			xfs_btree_mark_sick(cur);
1656 			error = -EFSCORRUPTED;
1657 			goto done;
1658 		}
1659 		break;
1660 
1661 	case 0:
1662 		/*
1663 		 * Setting the middle part of a previous oldext extent to
1664 		 * newext.  Contiguity is impossible here.
1665 		 * One extent becomes three extents.
1666 		 */
1667 		/* new right extent - oldext */
1668 		NEW.rm_startblock = bno + len;
1669 		NEW.rm_owner = owner;
1670 		NEW.rm_offset = new_endoff;
1671 		NEW.rm_blockcount = PREV.rm_offset + PREV.rm_blockcount -
1672 				new_endoff;
1673 		NEW.rm_flags = PREV.rm_flags;
1674 		error = xfs_rmap_update(cur, &NEW);
1675 		if (error)
1676 			goto done;
1677 		/* new left extent - oldext */
1678 		NEW = PREV;
1679 		NEW.rm_blockcount = offset - PREV.rm_offset;
1680 		cur->bc_rec.r = NEW;
1681 		trace_xfs_rmap_insert(cur, NEW.rm_startblock,
1682 				NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset,
1683 				NEW.rm_flags);
1684 		error = xfs_btree_insert(cur, &i);
1685 		if (error)
1686 			goto done;
1687 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1688 			xfs_btree_mark_sick(cur);
1689 			error = -EFSCORRUPTED;
1690 			goto done;
1691 		}
1692 		/*
1693 		 * Reset the cursor to the position of the new extent
1694 		 * we are about to insert as we can't trust it after
1695 		 * the previous insert.
1696 		 */
1697 		error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset,
1698 				oldext, &i);
1699 		if (error)
1700 			goto done;
1701 		if (XFS_IS_CORRUPT(mp, i != 0)) {
1702 			xfs_btree_mark_sick(cur);
1703 			error = -EFSCORRUPTED;
1704 			goto done;
1705 		}
1706 		/* new middle extent - newext */
1707 		cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN;
1708 		cur->bc_rec.r.rm_flags |= newext;
1709 		trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext);
1710 		error = xfs_btree_insert(cur, &i);
1711 		if (error)
1712 			goto done;
1713 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1714 			xfs_btree_mark_sick(cur);
1715 			error = -EFSCORRUPTED;
1716 			goto done;
1717 		}
1718 		break;
1719 
1720 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1721 	case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1722 	case RMAP_LEFT_FILLING | RMAP_RIGHT_CONTIG:
1723 	case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
1724 	case RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1725 	case RMAP_LEFT_CONTIG:
1726 	case RMAP_RIGHT_CONTIG:
1727 		/*
1728 		 * These cases are all impossible.
1729 		 */
1730 		ASSERT(0);
1731 	}
1732 
1733 	trace_xfs_rmap_convert_done(cur, bno, len, unwritten, oinfo);
1734 done:
1735 	if (error)
1736 		trace_xfs_rmap_convert_error(cur, error, _RET_IP_);
1737 	return error;
1738 }
1739 
1740 /*
1741  * Convert an unwritten extent to a real extent or vice versa.  If there is no
1742  * possibility of overlapping extents, delegate to the simpler convert
1743  * function.
1744  */
1745 STATIC int
xfs_rmap_convert_shared(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,bool unwritten,const struct xfs_owner_info * oinfo)1746 xfs_rmap_convert_shared(
1747 	struct xfs_btree_cur		*cur,
1748 	xfs_agblock_t			bno,
1749 	xfs_extlen_t			len,
1750 	bool				unwritten,
1751 	const struct xfs_owner_info	*oinfo)
1752 {
1753 	struct xfs_mount		*mp = cur->bc_mp;
1754 	struct xfs_rmap_irec		r[4];	/* neighbor extent entries */
1755 						/* left is 0, right is 1, */
1756 						/* prev is 2, new is 3 */
1757 	uint64_t		owner;
1758 	uint64_t		offset;
1759 	uint64_t		new_endoff;
1760 	unsigned int		oldext;
1761 	unsigned int		newext;
1762 	unsigned int		flags = 0;
1763 	int			i;
1764 	int			state = 0;
1765 	int			error;
1766 
1767 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
1768 	ASSERT(!(XFS_RMAP_NON_INODE_OWNER(owner) ||
1769 			(flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))));
1770 	oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0;
1771 	new_endoff = offset + len;
1772 	trace_xfs_rmap_convert(cur, bno, len, unwritten, oinfo);
1773 
1774 	/*
1775 	 * For the initial lookup, look for and exact match or the left-adjacent
1776 	 * record for our insertion point. This will also give us the record for
1777 	 * start block contiguity tests.
1778 	 */
1779 	error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext,
1780 			&PREV, &i);
1781 	if (error)
1782 		goto done;
1783 	if (XFS_IS_CORRUPT(mp, i != 1)) {
1784 		xfs_btree_mark_sick(cur);
1785 		error = -EFSCORRUPTED;
1786 		goto done;
1787 	}
1788 
1789 	ASSERT(PREV.rm_offset <= offset);
1790 	ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff);
1791 	ASSERT((PREV.rm_flags & XFS_RMAP_UNWRITTEN) == oldext);
1792 	newext = ~oldext & XFS_RMAP_UNWRITTEN;
1793 
1794 	/*
1795 	 * Set flags determining what part of the previous oldext allocation
1796 	 * extent is being replaced by a newext allocation.
1797 	 */
1798 	if (PREV.rm_offset == offset)
1799 		state |= RMAP_LEFT_FILLING;
1800 	if (PREV.rm_offset + PREV.rm_blockcount == new_endoff)
1801 		state |= RMAP_RIGHT_FILLING;
1802 
1803 	/* Is there a left record that abuts our range? */
1804 	error = xfs_rmap_find_left_neighbor(cur, bno, owner, offset, newext,
1805 			&LEFT, &i);
1806 	if (error)
1807 		goto done;
1808 	if (i) {
1809 		state |= RMAP_LEFT_VALID;
1810 		if (XFS_IS_CORRUPT(mp,
1811 				   LEFT.rm_startblock + LEFT.rm_blockcount >
1812 				   bno)) {
1813 			xfs_btree_mark_sick(cur);
1814 			error = -EFSCORRUPTED;
1815 			goto done;
1816 		}
1817 		if (xfs_rmap_is_mergeable(&LEFT, owner, newext))
1818 			state |= RMAP_LEFT_CONTIG;
1819 	}
1820 
1821 	/* Is there a right record that abuts our range? */
1822 	error = xfs_rmap_lookup_eq(cur, bno + len, len, owner, offset + len,
1823 			newext, &i);
1824 	if (error)
1825 		goto done;
1826 	if (i) {
1827 		state |= RMAP_RIGHT_VALID;
1828 		error = xfs_rmap_get_rec(cur, &RIGHT, &i);
1829 		if (error)
1830 			goto done;
1831 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1832 			xfs_btree_mark_sick(cur);
1833 			error = -EFSCORRUPTED;
1834 			goto done;
1835 		}
1836 		if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) {
1837 			xfs_btree_mark_sick(cur);
1838 			error = -EFSCORRUPTED;
1839 			goto done;
1840 		}
1841 		trace_xfs_rmap_find_right_neighbor_result(cur,
1842 				RIGHT.rm_startblock, RIGHT.rm_blockcount,
1843 				RIGHT.rm_owner, RIGHT.rm_offset,
1844 				RIGHT.rm_flags);
1845 		if (xfs_rmap_is_mergeable(&RIGHT, owner, newext))
1846 			state |= RMAP_RIGHT_CONTIG;
1847 	}
1848 
1849 	/* check that left + prev + right is not too long */
1850 	if ((state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1851 			 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) ==
1852 	    (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1853 	     RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG) &&
1854 	    (unsigned long)LEFT.rm_blockcount + len +
1855 	     RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX)
1856 		state &= ~RMAP_RIGHT_CONTIG;
1857 
1858 	trace_xfs_rmap_convert_state(cur, state, _RET_IP_);
1859 	/*
1860 	 * Switch out based on the FILLING and CONTIG state bits.
1861 	 */
1862 	switch (state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1863 			 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) {
1864 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1865 	     RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1866 		/*
1867 		 * Setting all of a previous oldext extent to newext.
1868 		 * The left and right neighbors are both contiguous with new.
1869 		 */
1870 		error = xfs_rmap_delete(cur, RIGHT.rm_startblock,
1871 				RIGHT.rm_blockcount, RIGHT.rm_owner,
1872 				RIGHT.rm_offset, RIGHT.rm_flags);
1873 		if (error)
1874 			goto done;
1875 		error = xfs_rmap_delete(cur, PREV.rm_startblock,
1876 				PREV.rm_blockcount, PREV.rm_owner,
1877 				PREV.rm_offset, PREV.rm_flags);
1878 		if (error)
1879 			goto done;
1880 		NEW = LEFT;
1881 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1882 				NEW.rm_blockcount, NEW.rm_owner,
1883 				NEW.rm_offset, NEW.rm_flags, &i);
1884 		if (error)
1885 			goto done;
1886 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1887 			xfs_btree_mark_sick(cur);
1888 			error = -EFSCORRUPTED;
1889 			goto done;
1890 		}
1891 		NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount;
1892 		error = xfs_rmap_update(cur, &NEW);
1893 		if (error)
1894 			goto done;
1895 		break;
1896 
1897 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
1898 		/*
1899 		 * Setting all of a previous oldext extent to newext.
1900 		 * The left neighbor is contiguous, the right is not.
1901 		 */
1902 		error = xfs_rmap_delete(cur, PREV.rm_startblock,
1903 				PREV.rm_blockcount, PREV.rm_owner,
1904 				PREV.rm_offset, PREV.rm_flags);
1905 		if (error)
1906 			goto done;
1907 		NEW = LEFT;
1908 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1909 				NEW.rm_blockcount, NEW.rm_owner,
1910 				NEW.rm_offset, NEW.rm_flags, &i);
1911 		if (error)
1912 			goto done;
1913 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1914 			xfs_btree_mark_sick(cur);
1915 			error = -EFSCORRUPTED;
1916 			goto done;
1917 		}
1918 		NEW.rm_blockcount += PREV.rm_blockcount;
1919 		error = xfs_rmap_update(cur, &NEW);
1920 		if (error)
1921 			goto done;
1922 		break;
1923 
1924 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1925 		/*
1926 		 * Setting all of a previous oldext extent to newext.
1927 		 * The right neighbor is contiguous, the left is not.
1928 		 */
1929 		error = xfs_rmap_delete(cur, RIGHT.rm_startblock,
1930 				RIGHT.rm_blockcount, RIGHT.rm_owner,
1931 				RIGHT.rm_offset, RIGHT.rm_flags);
1932 		if (error)
1933 			goto done;
1934 		NEW = PREV;
1935 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1936 				NEW.rm_blockcount, NEW.rm_owner,
1937 				NEW.rm_offset, NEW.rm_flags, &i);
1938 		if (error)
1939 			goto done;
1940 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1941 			xfs_btree_mark_sick(cur);
1942 			error = -EFSCORRUPTED;
1943 			goto done;
1944 		}
1945 		NEW.rm_blockcount += RIGHT.rm_blockcount;
1946 		NEW.rm_flags = RIGHT.rm_flags;
1947 		error = xfs_rmap_update(cur, &NEW);
1948 		if (error)
1949 			goto done;
1950 		break;
1951 
1952 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING:
1953 		/*
1954 		 * Setting all of a previous oldext extent to newext.
1955 		 * Neither the left nor right neighbors are contiguous with
1956 		 * the new one.
1957 		 */
1958 		NEW = PREV;
1959 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1960 				NEW.rm_blockcount, NEW.rm_owner,
1961 				NEW.rm_offset, NEW.rm_flags, &i);
1962 		if (error)
1963 			goto done;
1964 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1965 			xfs_btree_mark_sick(cur);
1966 			error = -EFSCORRUPTED;
1967 			goto done;
1968 		}
1969 		NEW.rm_flags = newext;
1970 		error = xfs_rmap_update(cur, &NEW);
1971 		if (error)
1972 			goto done;
1973 		break;
1974 
1975 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG:
1976 		/*
1977 		 * Setting the first part of a previous oldext extent to newext.
1978 		 * The left neighbor is contiguous.
1979 		 */
1980 		NEW = PREV;
1981 		error = xfs_rmap_delete(cur, NEW.rm_startblock,
1982 				NEW.rm_blockcount, NEW.rm_owner,
1983 				NEW.rm_offset, NEW.rm_flags);
1984 		if (error)
1985 			goto done;
1986 		NEW.rm_offset += len;
1987 		NEW.rm_startblock += len;
1988 		NEW.rm_blockcount -= len;
1989 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
1990 				NEW.rm_blockcount, NEW.rm_owner,
1991 				NEW.rm_offset, NEW.rm_flags);
1992 		if (error)
1993 			goto done;
1994 		NEW = LEFT;
1995 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1996 				NEW.rm_blockcount, NEW.rm_owner,
1997 				NEW.rm_offset, NEW.rm_flags, &i);
1998 		if (error)
1999 			goto done;
2000 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2001 			xfs_btree_mark_sick(cur);
2002 			error = -EFSCORRUPTED;
2003 			goto done;
2004 		}
2005 		NEW.rm_blockcount += len;
2006 		error = xfs_rmap_update(cur, &NEW);
2007 		if (error)
2008 			goto done;
2009 		break;
2010 
2011 	case RMAP_LEFT_FILLING:
2012 		/*
2013 		 * Setting the first part of a previous oldext extent to newext.
2014 		 * The left neighbor is not contiguous.
2015 		 */
2016 		NEW = PREV;
2017 		error = xfs_rmap_delete(cur, NEW.rm_startblock,
2018 				NEW.rm_blockcount, NEW.rm_owner,
2019 				NEW.rm_offset, NEW.rm_flags);
2020 		if (error)
2021 			goto done;
2022 		NEW.rm_offset += len;
2023 		NEW.rm_startblock += len;
2024 		NEW.rm_blockcount -= len;
2025 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
2026 				NEW.rm_blockcount, NEW.rm_owner,
2027 				NEW.rm_offset, NEW.rm_flags);
2028 		if (error)
2029 			goto done;
2030 		error = xfs_rmap_insert(cur, bno, len, owner, offset, newext);
2031 		if (error)
2032 			goto done;
2033 		break;
2034 
2035 	case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
2036 		/*
2037 		 * Setting the last part of a previous oldext extent to newext.
2038 		 * The right neighbor is contiguous with the new allocation.
2039 		 */
2040 		NEW = PREV;
2041 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
2042 				NEW.rm_blockcount, NEW.rm_owner,
2043 				NEW.rm_offset, NEW.rm_flags, &i);
2044 		if (error)
2045 			goto done;
2046 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2047 			xfs_btree_mark_sick(cur);
2048 			error = -EFSCORRUPTED;
2049 			goto done;
2050 		}
2051 		NEW.rm_blockcount = offset - NEW.rm_offset;
2052 		error = xfs_rmap_update(cur, &NEW);
2053 		if (error)
2054 			goto done;
2055 		NEW = RIGHT;
2056 		error = xfs_rmap_delete(cur, NEW.rm_startblock,
2057 				NEW.rm_blockcount, NEW.rm_owner,
2058 				NEW.rm_offset, NEW.rm_flags);
2059 		if (error)
2060 			goto done;
2061 		NEW.rm_offset = offset;
2062 		NEW.rm_startblock = bno;
2063 		NEW.rm_blockcount += len;
2064 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
2065 				NEW.rm_blockcount, NEW.rm_owner,
2066 				NEW.rm_offset, NEW.rm_flags);
2067 		if (error)
2068 			goto done;
2069 		break;
2070 
2071 	case RMAP_RIGHT_FILLING:
2072 		/*
2073 		 * Setting the last part of a previous oldext extent to newext.
2074 		 * The right neighbor is not contiguous.
2075 		 */
2076 		NEW = PREV;
2077 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
2078 				NEW.rm_blockcount, NEW.rm_owner,
2079 				NEW.rm_offset, NEW.rm_flags, &i);
2080 		if (error)
2081 			goto done;
2082 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2083 			xfs_btree_mark_sick(cur);
2084 			error = -EFSCORRUPTED;
2085 			goto done;
2086 		}
2087 		NEW.rm_blockcount -= len;
2088 		error = xfs_rmap_update(cur, &NEW);
2089 		if (error)
2090 			goto done;
2091 		error = xfs_rmap_insert(cur, bno, len, owner, offset, newext);
2092 		if (error)
2093 			goto done;
2094 		break;
2095 
2096 	case 0:
2097 		/*
2098 		 * Setting the middle part of a previous oldext extent to
2099 		 * newext.  Contiguity is impossible here.
2100 		 * One extent becomes three extents.
2101 		 */
2102 		/* new right extent - oldext */
2103 		NEW.rm_startblock = bno + len;
2104 		NEW.rm_owner = owner;
2105 		NEW.rm_offset = new_endoff;
2106 		NEW.rm_blockcount = PREV.rm_offset + PREV.rm_blockcount -
2107 				new_endoff;
2108 		NEW.rm_flags = PREV.rm_flags;
2109 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
2110 				NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset,
2111 				NEW.rm_flags);
2112 		if (error)
2113 			goto done;
2114 		/* new left extent - oldext */
2115 		NEW = PREV;
2116 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
2117 				NEW.rm_blockcount, NEW.rm_owner,
2118 				NEW.rm_offset, NEW.rm_flags, &i);
2119 		if (error)
2120 			goto done;
2121 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2122 			xfs_btree_mark_sick(cur);
2123 			error = -EFSCORRUPTED;
2124 			goto done;
2125 		}
2126 		NEW.rm_blockcount = offset - NEW.rm_offset;
2127 		error = xfs_rmap_update(cur, &NEW);
2128 		if (error)
2129 			goto done;
2130 		/* new middle extent - newext */
2131 		NEW.rm_startblock = bno;
2132 		NEW.rm_blockcount = len;
2133 		NEW.rm_owner = owner;
2134 		NEW.rm_offset = offset;
2135 		NEW.rm_flags = newext;
2136 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
2137 				NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset,
2138 				NEW.rm_flags);
2139 		if (error)
2140 			goto done;
2141 		break;
2142 
2143 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
2144 	case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
2145 	case RMAP_LEFT_FILLING | RMAP_RIGHT_CONTIG:
2146 	case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
2147 	case RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
2148 	case RMAP_LEFT_CONTIG:
2149 	case RMAP_RIGHT_CONTIG:
2150 		/*
2151 		 * These cases are all impossible.
2152 		 */
2153 		ASSERT(0);
2154 	}
2155 
2156 	trace_xfs_rmap_convert_done(cur, bno, len, unwritten, oinfo);
2157 done:
2158 	if (error)
2159 		trace_xfs_rmap_convert_error(cur, error, _RET_IP_);
2160 	return error;
2161 }
2162 
2163 #undef	NEW
2164 #undef	LEFT
2165 #undef	RIGHT
2166 #undef	PREV
2167 
2168 /*
2169  * Find an extent in the rmap btree and unmap it.  For rmap extent types that
2170  * can overlap (data fork rmaps on reflink filesystems) we must be careful
2171  * that the prev/next records in the btree might belong to another owner.
2172  * Therefore we must use delete+insert to alter any of the key fields.
2173  *
2174  * For every other situation there can only be one owner for a given extent,
2175  * so we can call the regular _free function.
2176  */
2177 STATIC int
xfs_rmap_unmap_shared(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,bool unwritten,const struct xfs_owner_info * oinfo)2178 xfs_rmap_unmap_shared(
2179 	struct xfs_btree_cur		*cur,
2180 	xfs_agblock_t			bno,
2181 	xfs_extlen_t			len,
2182 	bool				unwritten,
2183 	const struct xfs_owner_info	*oinfo)
2184 {
2185 	struct xfs_mount		*mp = cur->bc_mp;
2186 	struct xfs_rmap_irec		ltrec;
2187 	uint64_t			ltoff;
2188 	int				error = 0;
2189 	int				i;
2190 	uint64_t			owner;
2191 	uint64_t			offset;
2192 	unsigned int			flags;
2193 
2194 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
2195 	if (unwritten)
2196 		flags |= XFS_RMAP_UNWRITTEN;
2197 	trace_xfs_rmap_unmap(cur, bno, len, unwritten, oinfo);
2198 
2199 	/*
2200 	 * We should always have a left record because there's a static record
2201 	 * for the AG headers at rm_startblock == 0 created by mkfs/growfs that
2202 	 * will not ever be removed from the tree.
2203 	 */
2204 	error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags,
2205 			&ltrec, &i);
2206 	if (error)
2207 		goto out_error;
2208 	if (XFS_IS_CORRUPT(mp, i != 1)) {
2209 		xfs_btree_mark_sick(cur);
2210 		error = -EFSCORRUPTED;
2211 		goto out_error;
2212 	}
2213 	ltoff = ltrec.rm_offset;
2214 
2215 	/* Make sure the extent we found covers the entire freeing range. */
2216 	if (XFS_IS_CORRUPT(mp,
2217 			   ltrec.rm_startblock > bno ||
2218 			   ltrec.rm_startblock + ltrec.rm_blockcount <
2219 			   bno + len)) {
2220 		xfs_btree_mark_sick(cur);
2221 		error = -EFSCORRUPTED;
2222 		goto out_error;
2223 	}
2224 
2225 	/* Make sure the owner matches what we expect to find in the tree. */
2226 	if (XFS_IS_CORRUPT(mp, owner != ltrec.rm_owner)) {
2227 		xfs_btree_mark_sick(cur);
2228 		error = -EFSCORRUPTED;
2229 		goto out_error;
2230 	}
2231 
2232 	/* Make sure the unwritten flag matches. */
2233 	if (XFS_IS_CORRUPT(mp,
2234 			   (flags & XFS_RMAP_UNWRITTEN) !=
2235 			   (ltrec.rm_flags & XFS_RMAP_UNWRITTEN))) {
2236 		xfs_btree_mark_sick(cur);
2237 		error = -EFSCORRUPTED;
2238 		goto out_error;
2239 	}
2240 
2241 	/* Check the offset. */
2242 	if (XFS_IS_CORRUPT(mp, ltrec.rm_offset > offset)) {
2243 		xfs_btree_mark_sick(cur);
2244 		error = -EFSCORRUPTED;
2245 		goto out_error;
2246 	}
2247 	if (XFS_IS_CORRUPT(mp, offset > ltoff + ltrec.rm_blockcount)) {
2248 		xfs_btree_mark_sick(cur);
2249 		error = -EFSCORRUPTED;
2250 		goto out_error;
2251 	}
2252 
2253 	if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
2254 		/* Exact match, simply remove the record from rmap tree. */
2255 		error = xfs_rmap_delete(cur, ltrec.rm_startblock,
2256 				ltrec.rm_blockcount, ltrec.rm_owner,
2257 				ltrec.rm_offset, ltrec.rm_flags);
2258 		if (error)
2259 			goto out_error;
2260 	} else if (ltrec.rm_startblock == bno) {
2261 		/*
2262 		 * Overlap left hand side of extent: move the start, trim the
2263 		 * length and update the current record.
2264 		 *
2265 		 *       ltbno                ltlen
2266 		 * Orig:    |oooooooooooooooooooo|
2267 		 * Freeing: |fffffffff|
2268 		 * Result:            |rrrrrrrrrr|
2269 		 *         bno       len
2270 		 */
2271 
2272 		/* Delete prev rmap. */
2273 		error = xfs_rmap_delete(cur, ltrec.rm_startblock,
2274 				ltrec.rm_blockcount, ltrec.rm_owner,
2275 				ltrec.rm_offset, ltrec.rm_flags);
2276 		if (error)
2277 			goto out_error;
2278 
2279 		/* Add an rmap at the new offset. */
2280 		ltrec.rm_startblock += len;
2281 		ltrec.rm_blockcount -= len;
2282 		ltrec.rm_offset += len;
2283 		error = xfs_rmap_insert(cur, ltrec.rm_startblock,
2284 				ltrec.rm_blockcount, ltrec.rm_owner,
2285 				ltrec.rm_offset, ltrec.rm_flags);
2286 		if (error)
2287 			goto out_error;
2288 	} else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) {
2289 		/*
2290 		 * Overlap right hand side of extent: trim the length and
2291 		 * update the current record.
2292 		 *
2293 		 *       ltbno                ltlen
2294 		 * Orig:    |oooooooooooooooooooo|
2295 		 * Freeing:            |fffffffff|
2296 		 * Result:  |rrrrrrrrrr|
2297 		 *                    bno       len
2298 		 */
2299 		error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock,
2300 				ltrec.rm_blockcount, ltrec.rm_owner,
2301 				ltrec.rm_offset, ltrec.rm_flags, &i);
2302 		if (error)
2303 			goto out_error;
2304 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2305 			xfs_btree_mark_sick(cur);
2306 			error = -EFSCORRUPTED;
2307 			goto out_error;
2308 		}
2309 		ltrec.rm_blockcount -= len;
2310 		error = xfs_rmap_update(cur, &ltrec);
2311 		if (error)
2312 			goto out_error;
2313 	} else {
2314 		/*
2315 		 * Overlap middle of extent: trim the length of the existing
2316 		 * record to the length of the new left-extent size, increment
2317 		 * the insertion position so we can insert a new record
2318 		 * containing the remaining right-extent space.
2319 		 *
2320 		 *       ltbno                ltlen
2321 		 * Orig:    |oooooooooooooooooooo|
2322 		 * Freeing:       |fffffffff|
2323 		 * Result:  |rrrrr|         |rrrr|
2324 		 *               bno       len
2325 		 */
2326 		xfs_extlen_t	orig_len = ltrec.rm_blockcount;
2327 
2328 		/* Shrink the left side of the rmap */
2329 		error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock,
2330 				ltrec.rm_blockcount, ltrec.rm_owner,
2331 				ltrec.rm_offset, ltrec.rm_flags, &i);
2332 		if (error)
2333 			goto out_error;
2334 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2335 			xfs_btree_mark_sick(cur);
2336 			error = -EFSCORRUPTED;
2337 			goto out_error;
2338 		}
2339 		ltrec.rm_blockcount = bno - ltrec.rm_startblock;
2340 		error = xfs_rmap_update(cur, &ltrec);
2341 		if (error)
2342 			goto out_error;
2343 
2344 		/* Add an rmap at the new offset */
2345 		error = xfs_rmap_insert(cur, bno + len,
2346 				orig_len - len - ltrec.rm_blockcount,
2347 				ltrec.rm_owner, offset + len,
2348 				ltrec.rm_flags);
2349 		if (error)
2350 			goto out_error;
2351 	}
2352 
2353 	trace_xfs_rmap_unmap_done(cur, bno, len, unwritten, oinfo);
2354 out_error:
2355 	if (error)
2356 		trace_xfs_rmap_unmap_error(cur, error, _RET_IP_);
2357 	return error;
2358 }
2359 
2360 /*
2361  * Find an extent in the rmap btree and map it.  For rmap extent types that
2362  * can overlap (data fork rmaps on reflink filesystems) we must be careful
2363  * that the prev/next records in the btree might belong to another owner.
2364  * Therefore we must use delete+insert to alter any of the key fields.
2365  *
2366  * For every other situation there can only be one owner for a given extent,
2367  * so we can call the regular _alloc function.
2368  */
2369 STATIC int
xfs_rmap_map_shared(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,bool unwritten,const struct xfs_owner_info * oinfo)2370 xfs_rmap_map_shared(
2371 	struct xfs_btree_cur		*cur,
2372 	xfs_agblock_t			bno,
2373 	xfs_extlen_t			len,
2374 	bool				unwritten,
2375 	const struct xfs_owner_info	*oinfo)
2376 {
2377 	struct xfs_mount		*mp = cur->bc_mp;
2378 	struct xfs_rmap_irec		ltrec;
2379 	struct xfs_rmap_irec		gtrec;
2380 	int				have_gt;
2381 	int				have_lt;
2382 	int				error = 0;
2383 	int				i;
2384 	uint64_t			owner;
2385 	uint64_t			offset;
2386 	unsigned int			flags = 0;
2387 
2388 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
2389 	if (unwritten)
2390 		flags |= XFS_RMAP_UNWRITTEN;
2391 	trace_xfs_rmap_map(cur, bno, len, unwritten, oinfo);
2392 
2393 	/* Is there a left record that abuts our range? */
2394 	error = xfs_rmap_find_left_neighbor(cur, bno, owner, offset, flags,
2395 			&ltrec, &have_lt);
2396 	if (error)
2397 		goto out_error;
2398 	if (have_lt &&
2399 	    !xfs_rmap_is_mergeable(&ltrec, owner, flags))
2400 		have_lt = 0;
2401 
2402 	/* Is there a right record that abuts our range? */
2403 	error = xfs_rmap_lookup_eq(cur, bno + len, len, owner, offset + len,
2404 			flags, &have_gt);
2405 	if (error)
2406 		goto out_error;
2407 	if (have_gt) {
2408 		error = xfs_rmap_get_rec(cur, &gtrec, &have_gt);
2409 		if (error)
2410 			goto out_error;
2411 		if (XFS_IS_CORRUPT(mp, have_gt != 1)) {
2412 			xfs_btree_mark_sick(cur);
2413 			error = -EFSCORRUPTED;
2414 			goto out_error;
2415 		}
2416 		trace_xfs_rmap_find_right_neighbor_result(cur,
2417 				gtrec.rm_startblock, gtrec.rm_blockcount,
2418 				gtrec.rm_owner, gtrec.rm_offset,
2419 				gtrec.rm_flags);
2420 
2421 		if (!xfs_rmap_is_mergeable(&gtrec, owner, flags))
2422 			have_gt = 0;
2423 	}
2424 
2425 	if (have_lt &&
2426 	    ltrec.rm_startblock + ltrec.rm_blockcount == bno &&
2427 	    ltrec.rm_offset + ltrec.rm_blockcount == offset) {
2428 		/*
2429 		 * Left edge contiguous, merge into left record.
2430 		 *
2431 		 *       ltbno     ltlen
2432 		 * orig:   |ooooooooo|
2433 		 * adding:           |aaaaaaaaa|
2434 		 * result: |rrrrrrrrrrrrrrrrrrr|
2435 		 *                  bno       len
2436 		 */
2437 		ltrec.rm_blockcount += len;
2438 		if (have_gt &&
2439 		    bno + len == gtrec.rm_startblock &&
2440 		    offset + len == gtrec.rm_offset) {
2441 			/*
2442 			 * Right edge also contiguous, delete right record
2443 			 * and merge into left record.
2444 			 *
2445 			 *       ltbno     ltlen    gtbno     gtlen
2446 			 * orig:   |ooooooooo|         |ooooooooo|
2447 			 * adding:           |aaaaaaaaa|
2448 			 * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
2449 			 */
2450 			ltrec.rm_blockcount += gtrec.rm_blockcount;
2451 			error = xfs_rmap_delete(cur, gtrec.rm_startblock,
2452 					gtrec.rm_blockcount, gtrec.rm_owner,
2453 					gtrec.rm_offset, gtrec.rm_flags);
2454 			if (error)
2455 				goto out_error;
2456 		}
2457 
2458 		/* Point the cursor back to the left record and update. */
2459 		error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock,
2460 				ltrec.rm_blockcount, ltrec.rm_owner,
2461 				ltrec.rm_offset, ltrec.rm_flags, &i);
2462 		if (error)
2463 			goto out_error;
2464 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2465 			xfs_btree_mark_sick(cur);
2466 			error = -EFSCORRUPTED;
2467 			goto out_error;
2468 		}
2469 
2470 		error = xfs_rmap_update(cur, &ltrec);
2471 		if (error)
2472 			goto out_error;
2473 	} else if (have_gt &&
2474 		   bno + len == gtrec.rm_startblock &&
2475 		   offset + len == gtrec.rm_offset) {
2476 		/*
2477 		 * Right edge contiguous, merge into right record.
2478 		 *
2479 		 *                 gtbno     gtlen
2480 		 * Orig:             |ooooooooo|
2481 		 * adding: |aaaaaaaaa|
2482 		 * Result: |rrrrrrrrrrrrrrrrrrr|
2483 		 *        bno       len
2484 		 */
2485 		/* Delete the old record. */
2486 		error = xfs_rmap_delete(cur, gtrec.rm_startblock,
2487 				gtrec.rm_blockcount, gtrec.rm_owner,
2488 				gtrec.rm_offset, gtrec.rm_flags);
2489 		if (error)
2490 			goto out_error;
2491 
2492 		/* Move the start and re-add it. */
2493 		gtrec.rm_startblock = bno;
2494 		gtrec.rm_blockcount += len;
2495 		gtrec.rm_offset = offset;
2496 		error = xfs_rmap_insert(cur, gtrec.rm_startblock,
2497 				gtrec.rm_blockcount, gtrec.rm_owner,
2498 				gtrec.rm_offset, gtrec.rm_flags);
2499 		if (error)
2500 			goto out_error;
2501 	} else {
2502 		/*
2503 		 * No contiguous edge with identical owner, insert
2504 		 * new record at current cursor position.
2505 		 */
2506 		error = xfs_rmap_insert(cur, bno, len, owner, offset, flags);
2507 		if (error)
2508 			goto out_error;
2509 	}
2510 
2511 	trace_xfs_rmap_map_done(cur, bno, len, unwritten, oinfo);
2512 out_error:
2513 	if (error)
2514 		trace_xfs_rmap_map_error(cur, error, _RET_IP_);
2515 	return error;
2516 }
2517 
2518 /* Insert a raw rmap into the rmapbt. */
2519 int
xfs_rmap_map_raw(struct xfs_btree_cur * cur,struct xfs_rmap_irec * rmap)2520 xfs_rmap_map_raw(
2521 	struct xfs_btree_cur	*cur,
2522 	struct xfs_rmap_irec	*rmap)
2523 {
2524 	struct xfs_owner_info	oinfo;
2525 
2526 	xfs_owner_info_pack(&oinfo, rmap->rm_owner, rmap->rm_offset,
2527 			rmap->rm_flags);
2528 
2529 	if ((rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
2530 			       XFS_RMAP_UNWRITTEN)) ||
2531 	    XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
2532 		return xfs_rmap_map(cur, rmap->rm_startblock,
2533 				rmap->rm_blockcount,
2534 				rmap->rm_flags & XFS_RMAP_UNWRITTEN,
2535 				&oinfo);
2536 
2537 	return xfs_rmap_map_shared(cur, rmap->rm_startblock,
2538 			rmap->rm_blockcount,
2539 			rmap->rm_flags & XFS_RMAP_UNWRITTEN,
2540 			&oinfo);
2541 }
2542 
2543 struct xfs_rmap_query_range_info {
2544 	xfs_rmap_query_range_fn	fn;
2545 	void				*priv;
2546 };
2547 
2548 /* Format btree record and pass to our callback. */
2549 STATIC int
xfs_rmap_query_range_helper(struct xfs_btree_cur * cur,const union xfs_btree_rec * rec,void * priv)2550 xfs_rmap_query_range_helper(
2551 	struct xfs_btree_cur		*cur,
2552 	const union xfs_btree_rec	*rec,
2553 	void				*priv)
2554 {
2555 	struct xfs_rmap_query_range_info	*query = priv;
2556 	struct xfs_rmap_irec			irec;
2557 	xfs_failaddr_t				fa;
2558 
2559 	fa = xfs_rmap_btrec_to_irec(rec, &irec);
2560 	if (!fa)
2561 		fa = xfs_rmap_check_btrec(cur, &irec);
2562 	if (fa)
2563 		return xfs_rmap_complain_bad_rec(cur, fa, &irec);
2564 
2565 	return query->fn(cur, &irec, query->priv);
2566 }
2567 
2568 /* Find all rmaps between two keys. */
2569 int
xfs_rmap_query_range(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * low_rec,const struct xfs_rmap_irec * high_rec,xfs_rmap_query_range_fn fn,void * priv)2570 xfs_rmap_query_range(
2571 	struct xfs_btree_cur			*cur,
2572 	const struct xfs_rmap_irec		*low_rec,
2573 	const struct xfs_rmap_irec		*high_rec,
2574 	xfs_rmap_query_range_fn			fn,
2575 	void					*priv)
2576 {
2577 	union xfs_btree_irec			low_brec = { .r = *low_rec };
2578 	union xfs_btree_irec			high_brec = { .r = *high_rec };
2579 	struct xfs_rmap_query_range_info	query = { .priv = priv, .fn = fn };
2580 
2581 	return xfs_btree_query_range(cur, &low_brec, &high_brec,
2582 			xfs_rmap_query_range_helper, &query);
2583 }
2584 
2585 /* Find all rmaps. */
2586 int
xfs_rmap_query_all(struct xfs_btree_cur * cur,xfs_rmap_query_range_fn fn,void * priv)2587 xfs_rmap_query_all(
2588 	struct xfs_btree_cur			*cur,
2589 	xfs_rmap_query_range_fn			fn,
2590 	void					*priv)
2591 {
2592 	struct xfs_rmap_query_range_info	query;
2593 
2594 	query.priv = priv;
2595 	query.fn = fn;
2596 	return xfs_btree_query_all(cur, xfs_rmap_query_range_helper, &query);
2597 }
2598 
2599 /* Commit an rmap operation into the ondisk tree. */
2600 int
__xfs_rmap_finish_intent(struct xfs_btree_cur * rcur,enum xfs_rmap_intent_type op,xfs_agblock_t bno,xfs_extlen_t len,const struct xfs_owner_info * oinfo,bool unwritten)2601 __xfs_rmap_finish_intent(
2602 	struct xfs_btree_cur		*rcur,
2603 	enum xfs_rmap_intent_type	op,
2604 	xfs_agblock_t			bno,
2605 	xfs_extlen_t			len,
2606 	const struct xfs_owner_info	*oinfo,
2607 	bool				unwritten)
2608 {
2609 	switch (op) {
2610 	case XFS_RMAP_ALLOC:
2611 	case XFS_RMAP_MAP:
2612 		return xfs_rmap_map(rcur, bno, len, unwritten, oinfo);
2613 	case XFS_RMAP_MAP_SHARED:
2614 		return xfs_rmap_map_shared(rcur, bno, len, unwritten, oinfo);
2615 	case XFS_RMAP_FREE:
2616 	case XFS_RMAP_UNMAP:
2617 		return xfs_rmap_unmap(rcur, bno, len, unwritten, oinfo);
2618 	case XFS_RMAP_UNMAP_SHARED:
2619 		return xfs_rmap_unmap_shared(rcur, bno, len, unwritten, oinfo);
2620 	case XFS_RMAP_CONVERT:
2621 		return xfs_rmap_convert(rcur, bno, len, !unwritten, oinfo);
2622 	case XFS_RMAP_CONVERT_SHARED:
2623 		return xfs_rmap_convert_shared(rcur, bno, len, !unwritten,
2624 				oinfo);
2625 	default:
2626 		ASSERT(0);
2627 		return -EFSCORRUPTED;
2628 	}
2629 }
2630 
2631 static int
xfs_rmap_finish_init_cursor(struct xfs_trans * tp,struct xfs_rmap_intent * ri,struct xfs_btree_cur ** pcur)2632 xfs_rmap_finish_init_cursor(
2633 	struct xfs_trans		*tp,
2634 	struct xfs_rmap_intent		*ri,
2635 	struct xfs_btree_cur		**pcur)
2636 {
2637 	struct xfs_perag		*pag = to_perag(ri->ri_group);
2638 	struct xfs_buf			*agbp = NULL;
2639 	int				error;
2640 
2641 	/*
2642 	 * Refresh the freelist before we start changing the rmapbt, because a
2643 	 * shape change could cause us to allocate blocks.
2644 	 */
2645 	error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
2646 	if (error) {
2647 		xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
2648 		return error;
2649 	}
2650 	if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) {
2651 		xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
2652 		return -EFSCORRUPTED;
2653 	}
2654 	*pcur = xfs_rmapbt_init_cursor(tp->t_mountp, tp, agbp, pag);
2655 	return 0;
2656 }
2657 
2658 static int
xfs_rtrmap_finish_init_cursor(struct xfs_trans * tp,struct xfs_rmap_intent * ri,struct xfs_btree_cur ** pcur)2659 xfs_rtrmap_finish_init_cursor(
2660 	struct xfs_trans		*tp,
2661 	struct xfs_rmap_intent		*ri,
2662 	struct xfs_btree_cur		**pcur)
2663 {
2664 	struct xfs_rtgroup		*rtg = to_rtg(ri->ri_group);
2665 
2666 	xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
2667 	xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP);
2668 	*pcur = xfs_rtrmapbt_init_cursor(tp, rtg);
2669 	return 0;
2670 }
2671 
2672 /*
2673  * Process one of the deferred rmap operations.  We pass back the
2674  * btree cursor to maintain our lock on the rmapbt between calls.
2675  * This saves time and eliminates a buffer deadlock between the
2676  * superblock and the AGF because we'll always grab them in the same
2677  * order.
2678  */
2679 int
xfs_rmap_finish_one(struct xfs_trans * tp,struct xfs_rmap_intent * ri,struct xfs_btree_cur ** pcur)2680 xfs_rmap_finish_one(
2681 	struct xfs_trans		*tp,
2682 	struct xfs_rmap_intent		*ri,
2683 	struct xfs_btree_cur		**pcur)
2684 {
2685 	struct xfs_owner_info		oinfo;
2686 	struct xfs_mount		*mp = tp->t_mountp;
2687 	xfs_agblock_t			bno;
2688 	bool				unwritten;
2689 	int				error = 0;
2690 
2691 	trace_xfs_rmap_deferred(mp, ri);
2692 
2693 	if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE))
2694 		return -EIO;
2695 
2696 	/*
2697 	 * If we haven't gotten a cursor or the cursor AG doesn't match
2698 	 * the startblock, get one now.
2699 	 */
2700 	if (*pcur != NULL && (*pcur)->bc_group != ri->ri_group) {
2701 		xfs_btree_del_cursor(*pcur, 0);
2702 		*pcur = NULL;
2703 	}
2704 	if (*pcur == NULL) {
2705 		if (ri->ri_group->xg_type == XG_TYPE_RTG)
2706 			error = xfs_rtrmap_finish_init_cursor(tp, ri, pcur);
2707 		else
2708 			error = xfs_rmap_finish_init_cursor(tp, ri, pcur);
2709 		if (error)
2710 			return error;
2711 	}
2712 
2713 	xfs_rmap_ino_owner(&oinfo, ri->ri_owner, ri->ri_whichfork,
2714 			ri->ri_bmap.br_startoff);
2715 	unwritten = ri->ri_bmap.br_state == XFS_EXT_UNWRITTEN;
2716 
2717 	bno = xfs_fsb_to_gbno(mp, ri->ri_bmap.br_startblock,
2718 			ri->ri_group->xg_type);
2719 	error = __xfs_rmap_finish_intent(*pcur, ri->ri_type, bno,
2720 			ri->ri_bmap.br_blockcount, &oinfo, unwritten);
2721 	if (error)
2722 		return error;
2723 
2724 	xfs_rmap_update_hook(tp, ri->ri_group, ri->ri_type, bno,
2725 			ri->ri_bmap.br_blockcount, unwritten, &oinfo);
2726 	return 0;
2727 }
2728 
2729 /*
2730  * Don't defer an rmap if we aren't an rmap filesystem.
2731  */
2732 static bool
xfs_rmap_update_is_needed(struct xfs_mount * mp,int whichfork)2733 xfs_rmap_update_is_needed(
2734 	struct xfs_mount	*mp,
2735 	int			whichfork)
2736 {
2737 	return xfs_has_rmapbt(mp) && whichfork != XFS_COW_FORK;
2738 }
2739 
2740 /*
2741  * Record a rmap intent; the list is kept sorted first by AG and then by
2742  * increasing age.
2743  */
2744 static void
__xfs_rmap_add(struct xfs_trans * tp,enum xfs_rmap_intent_type type,uint64_t owner,bool isrt,int whichfork,struct xfs_bmbt_irec * bmap)2745 __xfs_rmap_add(
2746 	struct xfs_trans		*tp,
2747 	enum xfs_rmap_intent_type	type,
2748 	uint64_t			owner,
2749 	bool				isrt,
2750 	int				whichfork,
2751 	struct xfs_bmbt_irec		*bmap)
2752 {
2753 	struct xfs_rmap_intent		*ri;
2754 
2755 	ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL);
2756 	INIT_LIST_HEAD(&ri->ri_list);
2757 	ri->ri_type = type;
2758 	ri->ri_owner = owner;
2759 	ri->ri_whichfork = whichfork;
2760 	ri->ri_bmap = *bmap;
2761 	ri->ri_realtime = isrt;
2762 
2763 	xfs_rmap_defer_add(tp, ri);
2764 }
2765 
2766 /* Map an extent into a file. */
2767 void
xfs_rmap_map_extent(struct xfs_trans * tp,struct xfs_inode * ip,int whichfork,struct xfs_bmbt_irec * PREV)2768 xfs_rmap_map_extent(
2769 	struct xfs_trans	*tp,
2770 	struct xfs_inode	*ip,
2771 	int			whichfork,
2772 	struct xfs_bmbt_irec	*PREV)
2773 {
2774 	enum xfs_rmap_intent_type type = XFS_RMAP_MAP;
2775 	bool			isrt = xfs_ifork_is_realtime(ip, whichfork);
2776 
2777 	if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
2778 		return;
2779 
2780 	if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
2781 		type = XFS_RMAP_MAP_SHARED;
2782 
2783 	__xfs_rmap_add(tp, type, ip->i_ino, isrt, whichfork, PREV);
2784 }
2785 
2786 /* Unmap an extent out of a file. */
2787 void
xfs_rmap_unmap_extent(struct xfs_trans * tp,struct xfs_inode * ip,int whichfork,struct xfs_bmbt_irec * PREV)2788 xfs_rmap_unmap_extent(
2789 	struct xfs_trans	*tp,
2790 	struct xfs_inode	*ip,
2791 	int			whichfork,
2792 	struct xfs_bmbt_irec	*PREV)
2793 {
2794 	enum xfs_rmap_intent_type type = XFS_RMAP_UNMAP;
2795 	bool			isrt = xfs_ifork_is_realtime(ip, whichfork);
2796 
2797 	if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
2798 		return;
2799 
2800 	if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
2801 		type = XFS_RMAP_UNMAP_SHARED;
2802 
2803 	__xfs_rmap_add(tp, type, ip->i_ino, isrt, whichfork, PREV);
2804 }
2805 
2806 /*
2807  * Convert a data fork extent from unwritten to real or vice versa.
2808  *
2809  * Note that tp can be NULL here as no transaction is used for COW fork
2810  * unwritten conversion.
2811  */
2812 void
xfs_rmap_convert_extent(struct xfs_mount * mp,struct xfs_trans * tp,struct xfs_inode * ip,int whichfork,struct xfs_bmbt_irec * PREV)2813 xfs_rmap_convert_extent(
2814 	struct xfs_mount	*mp,
2815 	struct xfs_trans	*tp,
2816 	struct xfs_inode	*ip,
2817 	int			whichfork,
2818 	struct xfs_bmbt_irec	*PREV)
2819 {
2820 	enum xfs_rmap_intent_type type = XFS_RMAP_CONVERT;
2821 	bool			isrt = xfs_ifork_is_realtime(ip, whichfork);
2822 
2823 	if (!xfs_rmap_update_is_needed(mp, whichfork))
2824 		return;
2825 
2826 	if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
2827 		type = XFS_RMAP_CONVERT_SHARED;
2828 
2829 	__xfs_rmap_add(tp, type, ip->i_ino, isrt, whichfork, PREV);
2830 }
2831 
2832 /* Schedule the creation of an rmap for non-file data. */
2833 void
xfs_rmap_alloc_extent(struct xfs_trans * tp,bool isrt,xfs_fsblock_t fsbno,xfs_extlen_t len,uint64_t owner)2834 xfs_rmap_alloc_extent(
2835 	struct xfs_trans	*tp,
2836 	bool			isrt,
2837 	xfs_fsblock_t		fsbno,
2838 	xfs_extlen_t		len,
2839 	uint64_t		owner)
2840 {
2841 	struct xfs_bmbt_irec	bmap;
2842 
2843 	if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK))
2844 		return;
2845 
2846 	bmap.br_startblock = fsbno;
2847 	bmap.br_blockcount = len;
2848 	bmap.br_startoff = 0;
2849 	bmap.br_state = XFS_EXT_NORM;
2850 
2851 	__xfs_rmap_add(tp, XFS_RMAP_ALLOC, owner, isrt, XFS_DATA_FORK, &bmap);
2852 }
2853 
2854 /* Schedule the deletion of an rmap for non-file data. */
2855 void
xfs_rmap_free_extent(struct xfs_trans * tp,bool isrt,xfs_fsblock_t fsbno,xfs_extlen_t len,uint64_t owner)2856 xfs_rmap_free_extent(
2857 	struct xfs_trans	*tp,
2858 	bool			isrt,
2859 	xfs_fsblock_t		fsbno,
2860 	xfs_extlen_t		len,
2861 	uint64_t		owner)
2862 {
2863 	struct xfs_bmbt_irec	bmap;
2864 
2865 	if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK))
2866 		return;
2867 
2868 	bmap.br_startblock = fsbno;
2869 	bmap.br_blockcount = len;
2870 	bmap.br_startoff = 0;
2871 	bmap.br_state = XFS_EXT_NORM;
2872 
2873 	__xfs_rmap_add(tp, XFS_RMAP_FREE, owner, isrt, XFS_DATA_FORK, &bmap);
2874 }
2875 
2876 /* Compare rmap records.  Returns -1 if a < b, 1 if a > b, and 0 if equal. */
2877 int
xfs_rmap_compare(const struct xfs_rmap_irec * a,const struct xfs_rmap_irec * b)2878 xfs_rmap_compare(
2879 	const struct xfs_rmap_irec	*a,
2880 	const struct xfs_rmap_irec	*b)
2881 {
2882 	__u64				oa;
2883 	__u64				ob;
2884 
2885 	oa = xfs_rmap_irec_offset_pack(a);
2886 	ob = xfs_rmap_irec_offset_pack(b);
2887 
2888 	if (a->rm_startblock < b->rm_startblock)
2889 		return -1;
2890 	else if (a->rm_startblock > b->rm_startblock)
2891 		return 1;
2892 	else if (a->rm_owner < b->rm_owner)
2893 		return -1;
2894 	else if (a->rm_owner > b->rm_owner)
2895 		return 1;
2896 	else if (oa < ob)
2897 		return -1;
2898 	else if (oa > ob)
2899 		return 1;
2900 	else
2901 		return 0;
2902 }
2903 
2904 /*
2905  * Scan the physical storage part of the keyspace of the reverse mapping index
2906  * and tell us if the area has no records, is fully mapped by records, or is
2907  * partially filled.
2908  */
2909 int
xfs_rmap_has_records(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,enum xbtree_recpacking * outcome)2910 xfs_rmap_has_records(
2911 	struct xfs_btree_cur	*cur,
2912 	xfs_agblock_t		bno,
2913 	xfs_extlen_t		len,
2914 	enum xbtree_recpacking	*outcome)
2915 {
2916 	union xfs_btree_key	mask = {
2917 		.rmap.rm_startblock = cpu_to_be32(-1U),
2918 	};
2919 	union xfs_btree_irec	low;
2920 	union xfs_btree_irec	high;
2921 
2922 	memset(&low, 0, sizeof(low));
2923 	low.r.rm_startblock = bno;
2924 	memset(&high, 0xFF, sizeof(high));
2925 	high.r.rm_startblock = bno + len - 1;
2926 
2927 	return xfs_btree_has_records(cur, &low, &high, &mask, outcome);
2928 }
2929 
2930 struct xfs_rmap_ownercount {
2931 	/* Owner that we're looking for. */
2932 	struct xfs_rmap_irec	good;
2933 
2934 	/* rmap search keys */
2935 	struct xfs_rmap_irec	low;
2936 	struct xfs_rmap_irec	high;
2937 
2938 	struct xfs_rmap_matches	*results;
2939 
2940 	/* Stop early if we find a nonmatch? */
2941 	bool			stop_on_nonmatch;
2942 };
2943 
2944 /* Does this rmap represent space that can have multiple owners? */
2945 static inline bool
xfs_rmap_shareable(struct xfs_mount * mp,const struct xfs_rmap_irec * rmap)2946 xfs_rmap_shareable(
2947 	struct xfs_mount		*mp,
2948 	const struct xfs_rmap_irec	*rmap)
2949 {
2950 	if (!xfs_has_reflink(mp))
2951 		return false;
2952 	if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
2953 		return false;
2954 	if (rmap->rm_flags & (XFS_RMAP_ATTR_FORK |
2955 			      XFS_RMAP_BMBT_BLOCK))
2956 		return false;
2957 	return true;
2958 }
2959 
2960 static inline void
xfs_rmap_ownercount_init(struct xfs_rmap_ownercount * roc,xfs_agblock_t bno,xfs_extlen_t len,const struct xfs_owner_info * oinfo,struct xfs_rmap_matches * results)2961 xfs_rmap_ownercount_init(
2962 	struct xfs_rmap_ownercount	*roc,
2963 	xfs_agblock_t			bno,
2964 	xfs_extlen_t			len,
2965 	const struct xfs_owner_info	*oinfo,
2966 	struct xfs_rmap_matches		*results)
2967 {
2968 	memset(roc, 0, sizeof(*roc));
2969 	roc->results = results;
2970 
2971 	roc->low.rm_startblock = bno;
2972 	memset(&roc->high, 0xFF, sizeof(roc->high));
2973 	roc->high.rm_startblock = bno + len - 1;
2974 
2975 	memset(results, 0, sizeof(*results));
2976 	roc->good.rm_startblock = bno;
2977 	roc->good.rm_blockcount = len;
2978 	roc->good.rm_owner = oinfo->oi_owner;
2979 	roc->good.rm_offset = oinfo->oi_offset;
2980 	if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
2981 		roc->good.rm_flags |= XFS_RMAP_ATTR_FORK;
2982 	if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
2983 		roc->good.rm_flags |= XFS_RMAP_BMBT_BLOCK;
2984 }
2985 
2986 /* Figure out if this is a match for the owner. */
2987 STATIC int
xfs_rmap_count_owners_helper(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)2988 xfs_rmap_count_owners_helper(
2989 	struct xfs_btree_cur		*cur,
2990 	const struct xfs_rmap_irec	*rec,
2991 	void				*priv)
2992 {
2993 	struct xfs_rmap_ownercount	*roc = priv;
2994 	struct xfs_rmap_irec		check = *rec;
2995 	unsigned int			keyflags;
2996 	bool				filedata;
2997 	int64_t				delta;
2998 
2999 	filedata = !XFS_RMAP_NON_INODE_OWNER(check.rm_owner) &&
3000 		   !(check.rm_flags & XFS_RMAP_BMBT_BLOCK);
3001 
3002 	/* Trim the part of check that comes before the comparison range. */
3003 	delta = (int64_t)roc->good.rm_startblock - check.rm_startblock;
3004 	if (delta > 0) {
3005 		check.rm_startblock += delta;
3006 		check.rm_blockcount -= delta;
3007 		if (filedata)
3008 			check.rm_offset += delta;
3009 	}
3010 
3011 	/* Trim the part of check that comes after the comparison range. */
3012 	delta = (check.rm_startblock + check.rm_blockcount) -
3013 		(roc->good.rm_startblock + roc->good.rm_blockcount);
3014 	if (delta > 0)
3015 		check.rm_blockcount -= delta;
3016 
3017 	/* Don't care about unwritten status for establishing ownership. */
3018 	keyflags = check.rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK);
3019 
3020 	if (check.rm_startblock	== roc->good.rm_startblock &&
3021 	    check.rm_blockcount	== roc->good.rm_blockcount &&
3022 	    check.rm_owner	== roc->good.rm_owner &&
3023 	    check.rm_offset	== roc->good.rm_offset &&
3024 	    keyflags		== roc->good.rm_flags) {
3025 		roc->results->matches++;
3026 	} else {
3027 		roc->results->non_owner_matches++;
3028 		if (xfs_rmap_shareable(cur->bc_mp, &roc->good) ^
3029 		    xfs_rmap_shareable(cur->bc_mp, &check))
3030 			roc->results->bad_non_owner_matches++;
3031 	}
3032 
3033 	if (roc->results->non_owner_matches && roc->stop_on_nonmatch)
3034 		return -ECANCELED;
3035 
3036 	return 0;
3037 }
3038 
3039 /* Count the number of owners and non-owners of this range of blocks. */
3040 int
xfs_rmap_count_owners(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,const struct xfs_owner_info * oinfo,struct xfs_rmap_matches * results)3041 xfs_rmap_count_owners(
3042 	struct xfs_btree_cur		*cur,
3043 	xfs_agblock_t			bno,
3044 	xfs_extlen_t			len,
3045 	const struct xfs_owner_info	*oinfo,
3046 	struct xfs_rmap_matches		*results)
3047 {
3048 	struct xfs_rmap_ownercount	roc;
3049 	int				error;
3050 
3051 	xfs_rmap_ownercount_init(&roc, bno, len, oinfo, results);
3052 	error = xfs_rmap_query_range(cur, &roc.low, &roc.high,
3053 			xfs_rmap_count_owners_helper, &roc);
3054 	if (error)
3055 		return error;
3056 
3057 	/*
3058 	 * There can't be any non-owner rmaps that conflict with the given
3059 	 * owner if we didn't find any rmaps matching the owner.
3060 	 */
3061 	if (!results->matches)
3062 		results->bad_non_owner_matches = 0;
3063 
3064 	return 0;
3065 }
3066 
3067 /*
3068  * Given an extent and some owner info, can we find records overlapping
3069  * the extent whose owner info does not match the given owner?
3070  */
3071 int
xfs_rmap_has_other_keys(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,const struct xfs_owner_info * oinfo,bool * has_other)3072 xfs_rmap_has_other_keys(
3073 	struct xfs_btree_cur		*cur,
3074 	xfs_agblock_t			bno,
3075 	xfs_extlen_t			len,
3076 	const struct xfs_owner_info	*oinfo,
3077 	bool				*has_other)
3078 {
3079 	struct xfs_rmap_matches		res;
3080 	struct xfs_rmap_ownercount	roc;
3081 	int				error;
3082 
3083 	xfs_rmap_ownercount_init(&roc, bno, len, oinfo, &res);
3084 	roc.stop_on_nonmatch = true;
3085 
3086 	error = xfs_rmap_query_range(cur, &roc.low, &roc.high,
3087 			xfs_rmap_count_owners_helper, &roc);
3088 	if (error == -ECANCELED) {
3089 		*has_other = true;
3090 		return 0;
3091 	}
3092 	if (error)
3093 		return error;
3094 
3095 	*has_other = false;
3096 	return 0;
3097 }
3098 
3099 const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE = {
3100 	.oi_owner = XFS_RMAP_OWN_NULL,
3101 };
3102 const struct xfs_owner_info XFS_RMAP_OINFO_ANY_OWNER = {
3103 	.oi_owner = XFS_RMAP_OWN_UNKNOWN,
3104 };
3105 const struct xfs_owner_info XFS_RMAP_OINFO_FS = {
3106 	.oi_owner = XFS_RMAP_OWN_FS,
3107 };
3108 const struct xfs_owner_info XFS_RMAP_OINFO_LOG = {
3109 	.oi_owner = XFS_RMAP_OWN_LOG,
3110 };
3111 const struct xfs_owner_info XFS_RMAP_OINFO_AG = {
3112 	.oi_owner = XFS_RMAP_OWN_AG,
3113 };
3114 const struct xfs_owner_info XFS_RMAP_OINFO_INOBT = {
3115 	.oi_owner = XFS_RMAP_OWN_INOBT,
3116 };
3117 const struct xfs_owner_info XFS_RMAP_OINFO_INODES = {
3118 	.oi_owner = XFS_RMAP_OWN_INODES,
3119 };
3120 const struct xfs_owner_info XFS_RMAP_OINFO_REFC = {
3121 	.oi_owner = XFS_RMAP_OWN_REFC,
3122 };
3123 const struct xfs_owner_info XFS_RMAP_OINFO_COW = {
3124 	.oi_owner = XFS_RMAP_OWN_COW,
3125 };
3126 
3127 int __init
xfs_rmap_intent_init_cache(void)3128 xfs_rmap_intent_init_cache(void)
3129 {
3130 	xfs_rmap_intent_cache = kmem_cache_create("xfs_rmap_intent",
3131 			sizeof(struct xfs_rmap_intent),
3132 			0, 0, NULL);
3133 
3134 	return xfs_rmap_intent_cache != NULL ? 0 : -ENOMEM;
3135 }
3136 
3137 void
xfs_rmap_intent_destroy_cache(void)3138 xfs_rmap_intent_destroy_cache(void)
3139 {
3140 	kmem_cache_destroy(xfs_rmap_intent_cache);
3141 	xfs_rmap_intent_cache = NULL;
3142 }
3143