xref: /linux/fs/erofs/zmap.c (revision e87827da8c351db0de504534e6aa17be3014bc25)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2018-2019 HUAWEI, Inc.
4  *             https://www.huawei.com/
5  */
6 #include "internal.h"
7 #include <linux/unaligned.h>
8 #include <trace/events/erofs.h>
9 
10 struct z_erofs_maprecorder {
11 	struct inode *inode;
12 	struct erofs_map_blocks *map;
13 	u64 lcn;
14 	/* compression extent information gathered */
15 	u8  type, headtype;
16 	u16 clusterofs;
17 	u16 delta[2];
18 	erofs_blk_t pblk;
19 	erofs_off_t nextpackoff;
20 	int compressedblks;
21 	bool partialref, in_mbox;
22 };
23 
24 static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, u64 lcn)
25 {
26 	struct inode *const inode = m->inode;
27 	struct erofs_inode *const vi = EROFS_I(inode);
28 	const erofs_off_t pos = Z_EROFS_FULL_INDEX_START(erofs_iloc(inode) +
29 			vi->inode_isize + vi->xattr_isize) +
30 			lcn * sizeof(struct z_erofs_lcluster_index);
31 	struct z_erofs_lcluster_index *di;
32 	unsigned int advise;
33 
34 	di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox);
35 	if (IS_ERR(di))
36 		return PTR_ERR(di);
37 	m->lcn = lcn;
38 	m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index);
39 
40 	advise = le16_to_cpu(di->di_advise);
41 	m->type = advise & Z_EROFS_LI_LCLUSTER_TYPE_MASK;
42 	if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
43 		m->clusterofs = 1 << vi->z_lclusterbits;
44 		m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
45 		if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) {
46 			if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
47 					Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
48 				DBG_BUGON(1);
49 				return -EFSCORRUPTED;
50 			}
51 			m->compressedblks = m->delta[0] & ~Z_EROFS_LI_D0_CBLKCNT;
52 			m->delta[0] = 1;
53 		}
54 		m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
55 	} else {
56 		m->partialref = !!(advise & Z_EROFS_LI_PARTIAL_REF);
57 		m->clusterofs = le16_to_cpu(di->di_clusterofs);
58 		if (advise & Z_EROFS_LI_HOLE) {
59 			m->compressedblks = 0;
60 			m->pblk = EROFS_NULL_ADDR;
61 		} else {
62 			m->pblk = le32_to_cpu(di->di_u.blkaddr);
63 		}
64 	}
65 	return 0;
66 }
67 
68 static unsigned int decode_compactedbits(unsigned int lobits,
69 					 u8 *in, unsigned int pos, u8 *type)
70 {
71 	const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
72 	const unsigned int lo = v & ((1 << lobits) - 1);
73 
74 	*type = (v >> lobits) & 3;
75 	return lo;
76 }
77 
78 static int get_compacted_la_distance(unsigned int lobits,
79 				     unsigned int encodebits,
80 				     unsigned int vcnt, u8 *in, int i)
81 {
82 	unsigned int lo, d1 = 0;
83 	u8 type;
84 
85 	DBG_BUGON(i >= vcnt);
86 
87 	do {
88 		lo = decode_compactedbits(lobits, in, encodebits * i, &type);
89 
90 		if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
91 			return d1;
92 		++d1;
93 	} while (++i < vcnt);
94 
95 	/* vcnt - 1 (Z_EROFS_LCLUSTER_TYPE_NONHEAD) item */
96 	if (!(lo & Z_EROFS_LI_D0_CBLKCNT))
97 		d1 += lo - 1;
98 	return d1;
99 }
100 
101 static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
102 					 u64 lcn, bool lookahead)
103 {
104 	struct inode *const inode = m->inode;
105 	struct erofs_inode *const vi = EROFS_I(inode);
106 	const erofs_off_t ebase = Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) +
107 			vi->inode_isize + vi->xattr_isize);
108 	const unsigned int lclusterbits = vi->z_lclusterbits;
109 	const unsigned int totalidx = erofs_iblks(inode);
110 	unsigned int compacted_4b_initial, compacted_2b, amortizedshift;
111 	unsigned int vcnt, lo, lobits, encodebits, nblk, bytes;
112 	bool big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
113 	erofs_off_t pos;
114 	u8 *in, type;
115 	int i;
116 
117 	if (lcn >= totalidx || lclusterbits > 14)
118 		return -EINVAL;
119 
120 	m->lcn = lcn;
121 	/* used to align to 32-byte (compacted_2b) alignment */
122 	compacted_4b_initial = ((32 - ebase % 32) / 4) & 7;
123 	compacted_2b = 0;
124 	if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) &&
125 	    compacted_4b_initial < totalidx)
126 		compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
127 
128 	pos = ebase;
129 	amortizedshift = 2;	/* compact_4b */
130 	if (lcn >= compacted_4b_initial) {
131 		pos += compacted_4b_initial * 4;
132 		lcn -= compacted_4b_initial;
133 		if (lcn < compacted_2b) {
134 			amortizedshift = 1;
135 		} else {
136 			pos += compacted_2b * 2;
137 			lcn -= compacted_2b;
138 		}
139 	}
140 	pos += lcn * (1 << amortizedshift);
141 
142 	/* figure out the lcluster count in this pack */
143 	if (1 << amortizedshift == 4 && lclusterbits <= 14)
144 		vcnt = 2;
145 	else if (1 << amortizedshift == 2 && lclusterbits <= 12)
146 		vcnt = 16;
147 	else
148 		return -EOPNOTSUPP;
149 
150 	in = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox);
151 	if (IS_ERR(in))
152 		return PTR_ERR(in);
153 
154 	/* it doesn't equal to round_up(..) */
155 	m->nextpackoff = round_down(pos, vcnt << amortizedshift) +
156 			 (vcnt << amortizedshift);
157 	lobits = max(lclusterbits, ilog2(Z_EROFS_LI_D0_CBLKCNT) + 1U);
158 	encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
159 	bytes = pos & ((vcnt << amortizedshift) - 1);
160 	in -= bytes;
161 	i = bytes >> amortizedshift;
162 
163 	lo = decode_compactedbits(lobits, in, encodebits * i, &type);
164 	m->type = type;
165 	if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
166 		m->clusterofs = 1 << lclusterbits;
167 
168 		/* figure out lookahead_distance: delta[1] if needed */
169 		if (lookahead)
170 			m->delta[1] = get_compacted_la_distance(lobits,
171 						encodebits, vcnt, in, i);
172 		if (lo & Z_EROFS_LI_D0_CBLKCNT) {
173 			if (!big_pcluster) {
174 				DBG_BUGON(1);
175 				return -EFSCORRUPTED;
176 			}
177 			m->compressedblks = lo & ~Z_EROFS_LI_D0_CBLKCNT;
178 			m->delta[0] = 1;
179 			return 0;
180 		} else if (i + 1 != (int)vcnt) {
181 			m->delta[0] = lo;
182 			return 0;
183 		}
184 		/*
185 		 * since the last lcluster in the pack is special,
186 		 * of which lo saves delta[1] rather than delta[0].
187 		 * Hence, get delta[0] by the previous lcluster indirectly.
188 		 */
189 		lo = decode_compactedbits(lobits, in,
190 					  encodebits * (i - 1), &type);
191 		if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
192 			lo = 0;
193 		else if (lo & Z_EROFS_LI_D0_CBLKCNT)
194 			lo = 1;
195 		m->delta[0] = lo + 1;
196 		return 0;
197 	}
198 	m->clusterofs = lo;
199 	m->delta[0] = 0;
200 	/* figout out blkaddr (pblk) for HEAD lclusters */
201 	if (!big_pcluster) {
202 		nblk = 1;
203 		while (i > 0) {
204 			--i;
205 			lo = decode_compactedbits(lobits, in,
206 						  encodebits * i, &type);
207 			if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD)
208 				i -= lo;
209 
210 			if (i >= 0)
211 				++nblk;
212 		}
213 	} else {
214 		nblk = 0;
215 		while (i > 0) {
216 			--i;
217 			lo = decode_compactedbits(lobits, in,
218 						  encodebits * i, &type);
219 			if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
220 				if (lo & Z_EROFS_LI_D0_CBLKCNT) {
221 					--i;
222 					nblk += lo & ~Z_EROFS_LI_D0_CBLKCNT;
223 					continue;
224 				}
225 				/* bigpcluster shouldn't have plain d0 == 1 */
226 				if (lo <= 1) {
227 					DBG_BUGON(1);
228 					return -EFSCORRUPTED;
229 				}
230 				i -= lo - 2;
231 				continue;
232 			}
233 			++nblk;
234 		}
235 	}
236 	in += (vcnt << amortizedshift) - sizeof(__le32);
237 	m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
238 	return 0;
239 }
240 
241 static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m,
242 					   u64 lcn, bool lookahead)
243 {
244 	struct erofs_inode *vi = EROFS_I(m->inode);
245 	int err;
246 
247 	if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT) {
248 		err = z_erofs_load_compact_lcluster(m, lcn, lookahead);
249 	} else {
250 		DBG_BUGON(vi->datalayout != EROFS_INODE_COMPRESSED_FULL);
251 		err = z_erofs_load_full_lcluster(m, lcn);
252 	}
253 	if (err)
254 		return err;
255 
256 	if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) {
257 		erofs_err(m->inode->i_sb, "unknown type %u @ lcn %llu of nid %llu",
258 			  m->type, lcn, EROFS_I(m->inode)->nid);
259 		DBG_BUGON(1);
260 		return -EOPNOTSUPP;
261 	} else if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD &&
262 		   m->clusterofs >= (1 << vi->z_lclusterbits)) {
263 		DBG_BUGON(1);
264 		return -EFSCORRUPTED;
265 	}
266 	return 0;
267 }
268 
269 static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
270 				   unsigned int lookback_distance)
271 {
272 	struct super_block *sb = m->inode->i_sb;
273 	struct erofs_inode *const vi = EROFS_I(m->inode);
274 	const unsigned int lclusterbits = vi->z_lclusterbits;
275 
276 	while (m->lcn >= lookback_distance) {
277 		u64 lcn = m->lcn - lookback_distance;
278 		int err;
279 
280 		if (!lookback_distance)
281 			break;
282 
283 		err = z_erofs_load_lcluster_from_disk(m, lcn, false);
284 		if (err)
285 			return err;
286 		if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
287 			lookback_distance = m->delta[0];
288 			continue;
289 		}
290 		m->headtype = m->type;
291 		m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
292 		return 0;
293 	}
294 	erofs_err(sb, "bogus lookback distance %u @ lcn %llu of nid %llu",
295 		  lookback_distance, m->lcn, vi->nid);
296 	DBG_BUGON(1);
297 	return -EFSCORRUPTED;
298 }
299 
300 static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
301 					    unsigned int initial_lcn)
302 {
303 	struct inode *inode = m->inode;
304 	struct super_block *sb = inode->i_sb;
305 	struct erofs_inode *vi = EROFS_I(inode);
306 	bool bigpcl1 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
307 	bool bigpcl2 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2;
308 	u64 lcn = m->lcn + 1;
309 	int err;
310 
311 	DBG_BUGON(m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
312 	DBG_BUGON(m->type != m->headtype);
313 
314 	if ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1 && !bigpcl1) ||
315 	    ((m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
316 	      m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && !bigpcl2) ||
317 	    (lcn << vi->z_lclusterbits) >= inode->i_size)
318 		if (m->compressedblks < 0)
319 			m->compressedblks = 1;
320 
321 	if (m->compressedblks >= 0)
322 		goto out;
323 
324 	err = z_erofs_load_lcluster_from_disk(m, lcn, false);
325 	if (err)
326 		return err;
327 
328 	/*
329 	 * If the 1st NONHEAD lcluster has already been handled initially w/o
330 	 * valid compressedblks, which means at least it mustn't be CBLKCNT, or
331 	 * an internal implemenatation error is detected.
332 	 *
333 	 * The following code can also handle it properly anyway, but let's
334 	 * BUG_ON in the debugging mode only for developers to notice that.
335 	 */
336 	DBG_BUGON(lcn == initial_lcn &&
337 		  m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
338 
339 	if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
340 		/*
341 		 * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
342 		 * rather than CBLKCNT, it's a 1 block-sized pcluster.
343 		 */
344 		if (m->compressedblks < 0)
345 			m->compressedblks = 1;
346 	} else if (m->delta[0] != 1 || m->compressedblks < 0) {
347 		erofs_err(sb, "bogus CBLKCNT @ lcn %llu of nid %llu", lcn, vi->nid);
348 		DBG_BUGON(1);
349 		return -EFSCORRUPTED;
350 	}
351 
352 out:
353 	if (!m->compressedblks)
354 		m->map->m_flags &= ~EROFS_MAP_MAPPED;
355 	m->map->m_plen = erofs_pos(sb, m->compressedblks);
356 	return 0;
357 }
358 
359 static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
360 {
361 	struct inode *inode = m->inode;
362 	struct erofs_inode *vi = EROFS_I(inode);
363 	struct erofs_map_blocks *map = m->map;
364 	unsigned int lclusterbits = vi->z_lclusterbits;
365 	u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits;
366 	int err;
367 
368 	while (1) {
369 		/* handle the last EOF pcluster (no next HEAD lcluster) */
370 		if ((lcn << lclusterbits) >= inode->i_size) {
371 			map->m_llen = inode->i_size - map->m_la;
372 			return 0;
373 		}
374 
375 		err = z_erofs_load_lcluster_from_disk(m, lcn, true);
376 		if (err)
377 			return err;
378 
379 		if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
380 			/* work around invalid d1 generated by pre-1.0 mkfs */
381 			if (unlikely(!m->delta[1])) {
382 				m->delta[1] = 1;
383 				DBG_BUGON(1);
384 			}
385 		} else if (m->type < Z_EROFS_LCLUSTER_TYPE_MAX) {
386 			if (lcn != headlcn)
387 				break;	/* ends at the next HEAD lcluster */
388 			m->delta[1] = 1;
389 		}
390 		lcn += m->delta[1];
391 	}
392 	map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la;
393 	return 0;
394 }
395 
396 static int z_erofs_map_blocks_fo(struct inode *inode,
397 				 struct erofs_map_blocks *map, int flags)
398 {
399 	struct erofs_inode *vi = EROFS_I(inode);
400 	struct super_block *sb = inode->i_sb;
401 	bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
402 	bool ztailpacking = vi->z_idata_size;
403 	unsigned int lclusterbits = vi->z_lclusterbits;
404 	struct z_erofs_maprecorder m = {
405 		.inode = inode,
406 		.map = map,
407 		.in_mbox = erofs_inode_in_metabox(inode),
408 		.compressedblks = -1,
409 	};
410 	unsigned int endoff;
411 	unsigned long initial_lcn;
412 	unsigned long long ofs, end;
413 	int err;
414 
415 	ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la;
416 	if (fragment && !(flags & EROFS_GET_BLOCKS_FINDTAIL) &&
417 	    !vi->z_tailextent_headlcn) {
418 		map->m_la = 0;
419 		map->m_llen = inode->i_size;
420 		map->m_flags = EROFS_MAP_FRAGMENT;
421 		return 0;
422 	}
423 	initial_lcn = ofs >> lclusterbits;
424 	endoff = ofs & ((1 << lclusterbits) - 1);
425 
426 	err = z_erofs_load_lcluster_from_disk(&m, initial_lcn, false);
427 	if (err)
428 		goto unmap_out;
429 
430 	if ((flags & EROFS_GET_BLOCKS_FINDTAIL) && ztailpacking)
431 		vi->z_fragmentoff = m.nextpackoff;
432 	map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_PARTIAL_MAPPED;
433 	end = (m.lcn + 1ULL) << lclusterbits;
434 
435 	if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD && endoff >= m.clusterofs) {
436 		m.headtype = m.type;
437 		map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
438 		/*
439 		 * For ztailpacking files, in order to inline data more
440 		 * effectively, special EOF lclusters are now supported
441 		 * which can have three parts at most.
442 		 */
443 		if (ztailpacking && end > inode->i_size)
444 			end = inode->i_size;
445 	} else {
446 		if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
447 			end = (m.lcn << lclusterbits) | m.clusterofs;
448 			map->m_flags &= ~EROFS_MAP_PARTIAL_MAPPED;
449 			m.delta[0] = 1;
450 		}
451 		/* get the corresponding first chunk */
452 		err = z_erofs_extent_lookback(&m, m.delta[0]);
453 		if (err)
454 			goto unmap_out;
455 	}
456 	if (m.partialref)
457 		map->m_flags |= EROFS_MAP_PARTIAL_REF;
458 	map->m_llen = end - map->m_la;
459 
460 	if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
461 		vi->z_tailextent_headlcn = m.lcn;
462 		/* for non-compact indexes, fragmentoff is 64 bits */
463 		if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL)
464 			vi->z_fragmentoff |= (u64)m.pblk << 32;
465 	}
466 	if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
467 		map->m_flags |= EROFS_MAP_META;
468 		map->m_pa = vi->z_fragmentoff;
469 		map->m_plen = vi->z_idata_size;
470 		if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) {
471 			erofs_err(sb, "ztailpacking inline data across blocks @ nid %llu",
472 				  vi->nid);
473 			err = -EFSCORRUPTED;
474 			goto unmap_out;
475 		}
476 	} else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
477 		map->m_flags = EROFS_MAP_FRAGMENT;
478 	} else {
479 		map->m_pa = erofs_pos(sb, m.pblk);
480 		err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
481 		if (err)
482 			goto unmap_out;
483 	}
484 
485 	if (m.headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN) {
486 		if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
487 			map->m_algorithmformat = Z_EROFS_COMPRESSION_INTERLACED;
488 		else
489 			map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
490 	} else if (m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) {
491 		map->m_algorithmformat = vi->z_algorithmtype[1];
492 	} else {
493 		map->m_algorithmformat = vi->z_algorithmtype[0];
494 	}
495 
496 	if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
497 	    ((flags & EROFS_GET_BLOCKS_READMORE) &&
498 	     (map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA ||
499 	      map->m_algorithmformat == Z_EROFS_COMPRESSION_DEFLATE ||
500 	      map->m_algorithmformat == Z_EROFS_COMPRESSION_ZSTD) &&
501 	      map->m_llen >= i_blocksize(inode))) {
502 		err = z_erofs_get_extent_decompressedlen(&m);
503 		if (!err)
504 			map->m_flags &= ~EROFS_MAP_PARTIAL_MAPPED;
505 	}
506 
507 unmap_out:
508 	erofs_unmap_metabuf(&m.map->buf);
509 	return err;
510 }
511 
512 static int z_erofs_map_blocks_ext(struct inode *inode,
513 				  struct erofs_map_blocks *map, int flags)
514 {
515 	struct erofs_inode *vi = EROFS_I(inode);
516 	struct super_block *sb = inode->i_sb;
517 	bool interlaced = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
518 	unsigned int recsz = z_erofs_extent_recsize(vi->z_advise);
519 	erofs_off_t pos = round_up(Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) +
520 				   vi->inode_isize + vi->xattr_isize), recsz);
521 	unsigned int bmask = sb->s_blocksize - 1;
522 	bool in_mbox = erofs_inode_in_metabox(inode);
523 	erofs_off_t lend = inode->i_size;
524 	erofs_off_t l, r, mid, pa, la, lstart;
525 	struct z_erofs_extent *ext;
526 	unsigned int fmt;
527 	bool last;
528 
529 	map->m_flags = 0;
530 	if (recsz <= offsetof(struct z_erofs_extent, pstart_hi)) {
531 		if (recsz <= offsetof(struct z_erofs_extent, pstart_lo)) {
532 			ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox);
533 			if (IS_ERR(ext))
534 				return PTR_ERR(ext);
535 			pa = le64_to_cpu(*(__le64 *)ext);
536 			pos += sizeof(__le64);
537 			lstart = 0;
538 		} else {
539 			lstart = round_down(map->m_la, 1 << vi->z_lclusterbits);
540 			pos += (lstart >> vi->z_lclusterbits) * recsz;
541 			pa = EROFS_NULL_ADDR;
542 		}
543 
544 		for (; lstart <= map->m_la; lstart += 1 << vi->z_lclusterbits) {
545 			ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox);
546 			if (IS_ERR(ext))
547 				return PTR_ERR(ext);
548 			map->m_plen = le32_to_cpu(ext->plen);
549 			if (pa != EROFS_NULL_ADDR) {
550 				map->m_pa = pa;
551 				pa += map->m_plen & Z_EROFS_EXTENT_PLEN_MASK;
552 			} else {
553 				map->m_pa = le32_to_cpu(ext->pstart_lo);
554 			}
555 			pos += recsz;
556 		}
557 		last = (lstart >= round_up(lend, 1 << vi->z_lclusterbits));
558 		lend = min(lstart, lend);
559 		lstart -= 1 << vi->z_lclusterbits;
560 	} else {
561 		lstart = lend;
562 		for (l = 0, r = vi->z_extents; l < r; ) {
563 			mid = l + (r - l) / 2;
564 			ext = erofs_read_metabuf(&map->buf, sb,
565 						 pos + mid * recsz, in_mbox);
566 			if (IS_ERR(ext))
567 				return PTR_ERR(ext);
568 
569 			la = le32_to_cpu(ext->lstart_lo);
570 			pa = le32_to_cpu(ext->pstart_lo) |
571 				(u64)le32_to_cpu(ext->pstart_hi) << 32;
572 			if (recsz > offsetof(struct z_erofs_extent, lstart_hi))
573 				la |= (u64)le32_to_cpu(ext->lstart_hi) << 32;
574 
575 			if (la > map->m_la) {
576 				r = mid;
577 				if (la > lend) {
578 					DBG_BUGON(1);
579 					return -EFSCORRUPTED;
580 				}
581 				lend = la;
582 			} else {
583 				l = mid + 1;
584 				if (map->m_la == la)
585 					r = min(l + 1, r);
586 				lstart = la;
587 				map->m_plen = le32_to_cpu(ext->plen);
588 				map->m_pa = pa;
589 			}
590 		}
591 		last = (l >= vi->z_extents);
592 	}
593 
594 	if (lstart < lend) {
595 		map->m_la = lstart;
596 		if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) {
597 			map->m_flags = EROFS_MAP_FRAGMENT;
598 			vi->z_fragmentoff = map->m_plen;
599 			if (recsz > offsetof(struct z_erofs_extent, pstart_lo))
600 				vi->z_fragmentoff |= map->m_pa << 32;
601 		} else if (map->m_plen & Z_EROFS_EXTENT_PLEN_MASK) {
602 			map->m_flags |= EROFS_MAP_MAPPED;
603 			fmt = map->m_plen >> Z_EROFS_EXTENT_PLEN_FMT_BIT;
604 			if (map->m_plen & Z_EROFS_EXTENT_PLEN_PARTIAL)
605 				map->m_flags |= EROFS_MAP_PARTIAL_REF;
606 			map->m_plen &= Z_EROFS_EXTENT_PLEN_MASK;
607 			if (fmt)
608 				map->m_algorithmformat = fmt - 1;
609 			else if (interlaced && !((map->m_pa | map->m_plen) & bmask))
610 				map->m_algorithmformat =
611 					Z_EROFS_COMPRESSION_INTERLACED;
612 			else
613 				map->m_algorithmformat =
614 					Z_EROFS_COMPRESSION_SHIFTED;
615 		}
616 	}
617 	map->m_llen = lend - map->m_la;
618 	return 0;
619 }
620 
621 static int z_erofs_fill_inode(struct inode *inode, struct erofs_map_blocks *map)
622 {
623 	struct erofs_inode *const vi = EROFS_I(inode);
624 	struct super_block *const sb = inode->i_sb;
625 	struct z_erofs_map_header *h;
626 	erofs_off_t pos;
627 	int err = 0;
628 
629 	if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) {
630 		/*
631 		 * paired with smp_mb() at the end of the function to ensure
632 		 * fields will only be observed after the bit is set.
633 		 */
634 		smp_mb();
635 		return 0;
636 	}
637 
638 	if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE))
639 		return -ERESTARTSYS;
640 
641 	if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
642 		goto out_unlock;
643 
644 	pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
645 	h = erofs_read_metabuf(&map->buf, sb, pos, erofs_inode_in_metabox(inode));
646 	if (IS_ERR(h)) {
647 		err = PTR_ERR(h);
648 		goto out_unlock;
649 	}
650 
651 	/*
652 	 * if the highest bit of the 8-byte map header is set, the whole file
653 	 * is stored in the packed inode. The rest bits keeps z_fragmentoff.
654 	 */
655 	if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
656 		vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
657 		vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
658 		vi->z_tailextent_headlcn = 0;
659 		goto done;
660 	}
661 	vi->z_advise = le16_to_cpu(h->h_advise);
662 	vi->z_lclusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 15);
663 	if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL &&
664 	    (vi->z_advise & Z_EROFS_ADVISE_EXTENTS)) {
665 		vi->z_extents = le32_to_cpu(h->h_extents_lo) |
666 			((u64)le16_to_cpu(h->h_extents_hi) << 32);
667 		goto done;
668 	}
669 
670 	vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
671 	vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
672 	if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)
673 		vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
674 	else if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER)
675 		vi->z_idata_size = le16_to_cpu(h->h_idata_size);
676 
677 	if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
678 	    vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
679 			    Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
680 		erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
681 			  vi->nid);
682 		err = -EFSCORRUPTED;
683 		goto out_unlock;
684 	}
685 	if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT &&
686 	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
687 	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
688 		erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
689 			  vi->nid);
690 		err = -EFSCORRUPTED;
691 		goto out_unlock;
692 	}
693 
694 	if (vi->z_idata_size ||
695 	    (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) {
696 		struct erofs_map_blocks tm = {
697 			.buf = __EROFS_BUF_INITIALIZER
698 		};
699 
700 		err = z_erofs_map_blocks_fo(inode, &tm,
701 					    EROFS_GET_BLOCKS_FINDTAIL);
702 		erofs_put_metabuf(&tm.buf);
703 		if (err < 0)
704 			goto out_unlock;
705 	}
706 done:
707 	/* paired with smp_mb() at the beginning of the function */
708 	smp_mb();
709 	set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
710 out_unlock:
711 	clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
712 	return err;
713 }
714 
715 static int z_erofs_map_sanity_check(struct inode *inode,
716 				    struct erofs_map_blocks *map)
717 {
718 	struct erofs_sb_info *sbi = EROFS_I_SB(inode);
719 	u64 pend;
720 
721 	if (!(map->m_flags & EROFS_MAP_MAPPED))
722 		return 0;
723 	if (unlikely(map->m_algorithmformat >= Z_EROFS_COMPRESSION_RUNTIME_MAX)) {
724 		erofs_err(inode->i_sb, "unknown algorithm %d @ pos %llu for nid %llu, please upgrade kernel",
725 			  map->m_algorithmformat, map->m_la, EROFS_I(inode)->nid);
726 		return -EOPNOTSUPP;
727 	}
728 
729 	if (map->m_algorithmformat < Z_EROFS_COMPRESSION_MAX) {
730 		if (!(sbi->available_compr_algs & BIT(map->m_algorithmformat))) {
731 			erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu",
732 				  map->m_algorithmformat, EROFS_I(inode)->nid);
733 			return -EFSCORRUPTED;
734 		}
735 		if (EROFS_MAP_FULL(map->m_flags) && map->m_llen < map->m_plen) {
736 			erofs_err(inode->i_sb, "too much compressed data @ la %llu of nid %llu",
737 				  map->m_la, EROFS_I(inode)->nid);
738 			return -EFSCORRUPTED;
739 		}
740 	} else if (map->m_llen > map->m_plen) {
741 		erofs_err(inode->i_sb, "not enough plain data on disk @ la %llu of nid %llu",
742 			  map->m_la, EROFS_I(inode)->nid);
743 		return -EFSCORRUPTED;
744 	}
745 	if (unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE ||
746 		     map->m_llen > Z_EROFS_PCLUSTER_MAX_DSIZE))
747 		return -EOPNOTSUPP;
748 	/* Filesystems beyond 48-bit physical block addresses are invalid */
749 	if (unlikely(check_add_overflow(map->m_pa, map->m_plen, &pend) ||
750 		     (pend >> sbi->blkszbits) >= BIT_ULL(48)))
751 		return -EFSCORRUPTED;
752 	return 0;
753 }
754 
755 int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
756 			    int flags)
757 {
758 	struct erofs_inode *const vi = EROFS_I(inode);
759 	int err = 0;
760 
761 	trace_erofs_map_blocks_enter(inode, map, flags);
762 	if (map->m_la >= inode->i_size) {	/* post-EOF unmapped extent */
763 		map->m_llen = map->m_la + 1 - inode->i_size;
764 		map->m_la = inode->i_size;
765 		map->m_flags = 0;
766 	} else {
767 		err = z_erofs_fill_inode(inode, map);
768 		if (!err) {
769 			if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL &&
770 			    (vi->z_advise & Z_EROFS_ADVISE_EXTENTS))
771 				err = z_erofs_map_blocks_ext(inode, map, flags);
772 			else
773 				err = z_erofs_map_blocks_fo(inode, map, flags);
774 		}
775 		if (!err)
776 			err = z_erofs_map_sanity_check(inode, map);
777 		if (err)
778 			map->m_llen = 0;
779 	}
780 	trace_erofs_map_blocks_exit(inode, map, flags, err);
781 	return err;
782 }
783 
784 static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
785 				loff_t length, unsigned int flags,
786 				struct iomap *iomap, struct iomap *srcmap)
787 {
788 	int ret;
789 	struct erofs_map_blocks map = { .m_la = offset };
790 
791 	ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
792 	erofs_put_metabuf(&map.buf);
793 	if (ret < 0)
794 		return ret;
795 
796 	iomap->bdev = inode->i_sb->s_bdev;
797 	iomap->offset = map.m_la;
798 	iomap->length = map.m_llen;
799 	if (map.m_flags & EROFS_MAP_FRAGMENT) {
800 		iomap->type = IOMAP_MAPPED;
801 		iomap->addr = IOMAP_NULL_ADDR;
802 	} else if (map.m_flags & EROFS_MAP_MAPPED) {
803 		iomap->type = IOMAP_MAPPED;
804 		iomap->addr = map.m_pa;
805 	} else {
806 		iomap->type = IOMAP_HOLE;
807 		iomap->addr = IOMAP_NULL_ADDR;
808 		/*
809 		 * No strict rule on how to describe extents for post EOF, yet
810 		 * we need to do like below. Otherwise, iomap itself will get
811 		 * into an endless loop on post EOF.
812 		 *
813 		 * Calculate the effective offset by subtracting extent start
814 		 * (map.m_la) from the requested offset, and add it to length.
815 		 * (NB: offset >= map.m_la always)
816 		 */
817 		if (iomap->offset >= inode->i_size)
818 			iomap->length = length + offset - map.m_la;
819 	}
820 	iomap->flags = 0;
821 	return 0;
822 }
823 
824 const struct iomap_ops z_erofs_iomap_report_ops = {
825 	.iomap_begin = z_erofs_iomap_begin_report,
826 };
827