xref: /linux/fs/erofs/zmap.c (revision 9e4e86a604dfd06402933467578c4b79f5412b2c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2018-2019 HUAWEI, Inc.
4  *             https://www.huawei.com/
5  */
6 #include "internal.h"
7 #include <linux/unaligned.h>
8 #include <trace/events/erofs.h>
9 
10 struct z_erofs_maprecorder {
11 	struct inode *inode;
12 	struct erofs_map_blocks *map;
13 	u64 lcn;
14 	/* compression extent information gathered */
15 	u8  type, headtype;
16 	u16 clusterofs;
17 	u16 delta[2];
18 	erofs_blk_t pblk, compressedblks;
19 	erofs_off_t nextpackoff;
20 	bool partialref, in_mbox;
21 };
22 
z_erofs_load_full_lcluster(struct z_erofs_maprecorder * m,u64 lcn)23 static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, u64 lcn)
24 {
25 	struct inode *const inode = m->inode;
26 	struct erofs_inode *const vi = EROFS_I(inode);
27 	const erofs_off_t pos = Z_EROFS_FULL_INDEX_START(erofs_iloc(inode) +
28 			vi->inode_isize + vi->xattr_isize) +
29 			lcn * sizeof(struct z_erofs_lcluster_index);
30 	struct z_erofs_lcluster_index *di;
31 	unsigned int advise;
32 
33 	di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox);
34 	if (IS_ERR(di))
35 		return PTR_ERR(di);
36 	m->lcn = lcn;
37 	m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index);
38 
39 	advise = le16_to_cpu(di->di_advise);
40 	m->type = advise & Z_EROFS_LI_LCLUSTER_TYPE_MASK;
41 	if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
42 		m->clusterofs = 1 << vi->z_lclusterbits;
43 		m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
44 		if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) {
45 			if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
46 					Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
47 				DBG_BUGON(1);
48 				return -EFSCORRUPTED;
49 			}
50 			m->compressedblks = m->delta[0] & ~Z_EROFS_LI_D0_CBLKCNT;
51 			m->delta[0] = 1;
52 		}
53 		m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
54 	} else {
55 		m->partialref = !!(advise & Z_EROFS_LI_PARTIAL_REF);
56 		m->clusterofs = le16_to_cpu(di->di_clusterofs);
57 		m->pblk = le32_to_cpu(di->di_u.blkaddr);
58 	}
59 	return 0;
60 }
61 
decode_compactedbits(unsigned int lobits,u8 * in,unsigned int pos,u8 * type)62 static unsigned int decode_compactedbits(unsigned int lobits,
63 					 u8 *in, unsigned int pos, u8 *type)
64 {
65 	const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
66 	const unsigned int lo = v & ((1 << lobits) - 1);
67 
68 	*type = (v >> lobits) & 3;
69 	return lo;
70 }
71 
get_compacted_la_distance(unsigned int lobits,unsigned int encodebits,unsigned int vcnt,u8 * in,int i)72 static int get_compacted_la_distance(unsigned int lobits,
73 				     unsigned int encodebits,
74 				     unsigned int vcnt, u8 *in, int i)
75 {
76 	unsigned int lo, d1 = 0;
77 	u8 type;
78 
79 	DBG_BUGON(i >= vcnt);
80 
81 	do {
82 		lo = decode_compactedbits(lobits, in, encodebits * i, &type);
83 
84 		if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
85 			return d1;
86 		++d1;
87 	} while (++i < vcnt);
88 
89 	/* vcnt - 1 (Z_EROFS_LCLUSTER_TYPE_NONHEAD) item */
90 	if (!(lo & Z_EROFS_LI_D0_CBLKCNT))
91 		d1 += lo - 1;
92 	return d1;
93 }
94 
z_erofs_load_compact_lcluster(struct z_erofs_maprecorder * m,u64 lcn,bool lookahead)95 static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
96 					 u64 lcn, bool lookahead)
97 {
98 	struct inode *const inode = m->inode;
99 	struct erofs_inode *const vi = EROFS_I(inode);
100 	const erofs_off_t ebase = Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) +
101 			vi->inode_isize + vi->xattr_isize);
102 	const unsigned int lclusterbits = vi->z_lclusterbits;
103 	const unsigned int totalidx = erofs_iblks(inode);
104 	unsigned int compacted_4b_initial, compacted_2b, amortizedshift;
105 	unsigned int vcnt, lo, lobits, encodebits, nblk, bytes;
106 	bool big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
107 	erofs_off_t pos;
108 	u8 *in, type;
109 	int i;
110 
111 	if (lcn >= totalidx || lclusterbits > 14)
112 		return -EINVAL;
113 
114 	m->lcn = lcn;
115 	/* used to align to 32-byte (compacted_2b) alignment */
116 	compacted_4b_initial = ((32 - ebase % 32) / 4) & 7;
117 	compacted_2b = 0;
118 	if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) &&
119 	    compacted_4b_initial < totalidx)
120 		compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
121 
122 	pos = ebase;
123 	amortizedshift = 2;	/* compact_4b */
124 	if (lcn >= compacted_4b_initial) {
125 		pos += compacted_4b_initial * 4;
126 		lcn -= compacted_4b_initial;
127 		if (lcn < compacted_2b) {
128 			amortizedshift = 1;
129 		} else {
130 			pos += compacted_2b * 2;
131 			lcn -= compacted_2b;
132 		}
133 	}
134 	pos += lcn * (1 << amortizedshift);
135 
136 	/* figure out the lcluster count in this pack */
137 	if (1 << amortizedshift == 4 && lclusterbits <= 14)
138 		vcnt = 2;
139 	else if (1 << amortizedshift == 2 && lclusterbits <= 12)
140 		vcnt = 16;
141 	else
142 		return -EOPNOTSUPP;
143 
144 	in = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox);
145 	if (IS_ERR(in))
146 		return PTR_ERR(in);
147 
148 	/* it doesn't equal to round_up(..) */
149 	m->nextpackoff = round_down(pos, vcnt << amortizedshift) +
150 			 (vcnt << amortizedshift);
151 	lobits = max(lclusterbits, ilog2(Z_EROFS_LI_D0_CBLKCNT) + 1U);
152 	encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
153 	bytes = pos & ((vcnt << amortizedshift) - 1);
154 	in -= bytes;
155 	i = bytes >> amortizedshift;
156 
157 	lo = decode_compactedbits(lobits, in, encodebits * i, &type);
158 	m->type = type;
159 	if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
160 		m->clusterofs = 1 << lclusterbits;
161 
162 		/* figure out lookahead_distance: delta[1] if needed */
163 		if (lookahead)
164 			m->delta[1] = get_compacted_la_distance(lobits,
165 						encodebits, vcnt, in, i);
166 		if (lo & Z_EROFS_LI_D0_CBLKCNT) {
167 			if (!big_pcluster) {
168 				DBG_BUGON(1);
169 				return -EFSCORRUPTED;
170 			}
171 			m->compressedblks = lo & ~Z_EROFS_LI_D0_CBLKCNT;
172 			m->delta[0] = 1;
173 			return 0;
174 		} else if (i + 1 != (int)vcnt) {
175 			m->delta[0] = lo;
176 			return 0;
177 		}
178 		/*
179 		 * since the last lcluster in the pack is special,
180 		 * of which lo saves delta[1] rather than delta[0].
181 		 * Hence, get delta[0] by the previous lcluster indirectly.
182 		 */
183 		lo = decode_compactedbits(lobits, in,
184 					  encodebits * (i - 1), &type);
185 		if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
186 			lo = 0;
187 		else if (lo & Z_EROFS_LI_D0_CBLKCNT)
188 			lo = 1;
189 		m->delta[0] = lo + 1;
190 		return 0;
191 	}
192 	m->clusterofs = lo;
193 	m->delta[0] = 0;
194 	/* figout out blkaddr (pblk) for HEAD lclusters */
195 	if (!big_pcluster) {
196 		nblk = 1;
197 		while (i > 0) {
198 			--i;
199 			lo = decode_compactedbits(lobits, in,
200 						  encodebits * i, &type);
201 			if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD)
202 				i -= lo;
203 
204 			if (i >= 0)
205 				++nblk;
206 		}
207 	} else {
208 		nblk = 0;
209 		while (i > 0) {
210 			--i;
211 			lo = decode_compactedbits(lobits, in,
212 						  encodebits * i, &type);
213 			if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
214 				if (lo & Z_EROFS_LI_D0_CBLKCNT) {
215 					--i;
216 					nblk += lo & ~Z_EROFS_LI_D0_CBLKCNT;
217 					continue;
218 				}
219 				/* bigpcluster shouldn't have plain d0 == 1 */
220 				if (lo <= 1) {
221 					DBG_BUGON(1);
222 					return -EFSCORRUPTED;
223 				}
224 				i -= lo - 2;
225 				continue;
226 			}
227 			++nblk;
228 		}
229 	}
230 	in += (vcnt << amortizedshift) - sizeof(__le32);
231 	m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
232 	return 0;
233 }
234 
z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder * m,u64 lcn,bool lookahead)235 static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m,
236 					   u64 lcn, bool lookahead)
237 {
238 	struct erofs_inode *vi = EROFS_I(m->inode);
239 	int err;
240 
241 	if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT) {
242 		err = z_erofs_load_compact_lcluster(m, lcn, lookahead);
243 	} else {
244 		DBG_BUGON(vi->datalayout != EROFS_INODE_COMPRESSED_FULL);
245 		err = z_erofs_load_full_lcluster(m, lcn);
246 	}
247 	if (err)
248 		return err;
249 
250 	if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) {
251 		erofs_err(m->inode->i_sb, "unknown type %u @ lcn %llu of nid %llu",
252 			  m->type, lcn, EROFS_I(m->inode)->nid);
253 		DBG_BUGON(1);
254 		return -EOPNOTSUPP;
255 	} else if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD &&
256 		   m->clusterofs >= (1 << vi->z_lclusterbits)) {
257 		DBG_BUGON(1);
258 		return -EFSCORRUPTED;
259 	}
260 	return 0;
261 }
262 
z_erofs_extent_lookback(struct z_erofs_maprecorder * m,unsigned int lookback_distance)263 static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
264 				   unsigned int lookback_distance)
265 {
266 	struct super_block *sb = m->inode->i_sb;
267 	struct erofs_inode *const vi = EROFS_I(m->inode);
268 	const unsigned int lclusterbits = vi->z_lclusterbits;
269 
270 	while (m->lcn >= lookback_distance) {
271 		u64 lcn = m->lcn - lookback_distance;
272 		int err;
273 
274 		if (!lookback_distance)
275 			break;
276 
277 		err = z_erofs_load_lcluster_from_disk(m, lcn, false);
278 		if (err)
279 			return err;
280 		if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
281 			lookback_distance = m->delta[0];
282 			continue;
283 		}
284 		m->headtype = m->type;
285 		m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
286 		return 0;
287 	}
288 	erofs_err(sb, "bogus lookback distance %u @ lcn %llu of nid %llu",
289 		  lookback_distance, m->lcn, vi->nid);
290 	DBG_BUGON(1);
291 	return -EFSCORRUPTED;
292 }
293 
z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder * m,unsigned int initial_lcn)294 static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
295 					    unsigned int initial_lcn)
296 {
297 	struct inode *inode = m->inode;
298 	struct super_block *sb = inode->i_sb;
299 	struct erofs_inode *vi = EROFS_I(inode);
300 	bool bigpcl1 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
301 	bool bigpcl2 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2;
302 	u64 lcn = m->lcn + 1;
303 	int err;
304 
305 	DBG_BUGON(m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
306 	DBG_BUGON(m->type != m->headtype);
307 
308 	if ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1 && !bigpcl1) ||
309 	    ((m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
310 	      m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && !bigpcl2) ||
311 	    (lcn << vi->z_lclusterbits) >= inode->i_size)
312 		m->compressedblks = 1;
313 
314 	if (m->compressedblks)
315 		goto out;
316 
317 	err = z_erofs_load_lcluster_from_disk(m, lcn, false);
318 	if (err)
319 		return err;
320 
321 	/*
322 	 * If the 1st NONHEAD lcluster has already been handled initially w/o
323 	 * valid compressedblks, which means at least it mustn't be CBLKCNT, or
324 	 * an internal implemenatation error is detected.
325 	 *
326 	 * The following code can also handle it properly anyway, but let's
327 	 * BUG_ON in the debugging mode only for developers to notice that.
328 	 */
329 	DBG_BUGON(lcn == initial_lcn &&
330 		  m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
331 
332 	if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD && m->delta[0] != 1) {
333 		erofs_err(sb, "bogus CBLKCNT @ lcn %llu of nid %llu", lcn, vi->nid);
334 		DBG_BUGON(1);
335 		return -EFSCORRUPTED;
336 	}
337 
338 	/*
339 	 * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type rather
340 	 * than CBLKCNT, it's a 1 block-sized pcluster.
341 	 */
342 	if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD || !m->compressedblks)
343 		m->compressedblks = 1;
344 out:
345 	m->map->m_plen = erofs_pos(sb, m->compressedblks);
346 	return 0;
347 }
348 
z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder * m)349 static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
350 {
351 	struct inode *inode = m->inode;
352 	struct erofs_inode *vi = EROFS_I(inode);
353 	struct erofs_map_blocks *map = m->map;
354 	unsigned int lclusterbits = vi->z_lclusterbits;
355 	u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits;
356 	int err;
357 
358 	while (1) {
359 		/* handle the last EOF pcluster (no next HEAD lcluster) */
360 		if ((lcn << lclusterbits) >= inode->i_size) {
361 			map->m_llen = inode->i_size - map->m_la;
362 			return 0;
363 		}
364 
365 		err = z_erofs_load_lcluster_from_disk(m, lcn, true);
366 		if (err)
367 			return err;
368 
369 		if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
370 			/* work around invalid d1 generated by pre-1.0 mkfs */
371 			if (unlikely(!m->delta[1])) {
372 				m->delta[1] = 1;
373 				DBG_BUGON(1);
374 			}
375 		} else if (m->type < Z_EROFS_LCLUSTER_TYPE_MAX) {
376 			if (lcn != headlcn)
377 				break;	/* ends at the next HEAD lcluster */
378 			m->delta[1] = 1;
379 		}
380 		lcn += m->delta[1];
381 	}
382 	map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la;
383 	return 0;
384 }
385 
z_erofs_map_blocks_fo(struct inode * inode,struct erofs_map_blocks * map,int flags)386 static int z_erofs_map_blocks_fo(struct inode *inode,
387 				 struct erofs_map_blocks *map, int flags)
388 {
389 	struct erofs_inode *vi = EROFS_I(inode);
390 	struct super_block *sb = inode->i_sb;
391 	bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
392 	bool ztailpacking = vi->z_idata_size;
393 	unsigned int lclusterbits = vi->z_lclusterbits;
394 	struct z_erofs_maprecorder m = {
395 		.inode = inode,
396 		.map = map,
397 		.in_mbox = erofs_inode_in_metabox(inode),
398 	};
399 	unsigned int endoff;
400 	unsigned long initial_lcn;
401 	unsigned long long ofs, end;
402 	int err;
403 
404 	ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la;
405 	if (fragment && !(flags & EROFS_GET_BLOCKS_FINDTAIL) &&
406 	    !vi->z_tailextent_headlcn) {
407 		map->m_la = 0;
408 		map->m_llen = inode->i_size;
409 		map->m_flags = EROFS_MAP_FRAGMENT;
410 		return 0;
411 	}
412 	initial_lcn = ofs >> lclusterbits;
413 	endoff = ofs & ((1 << lclusterbits) - 1);
414 
415 	err = z_erofs_load_lcluster_from_disk(&m, initial_lcn, false);
416 	if (err)
417 		goto unmap_out;
418 
419 	if ((flags & EROFS_GET_BLOCKS_FINDTAIL) && ztailpacking)
420 		vi->z_fragmentoff = m.nextpackoff;
421 	map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_PARTIAL_MAPPED;
422 	end = (m.lcn + 1ULL) << lclusterbits;
423 
424 	if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD && endoff >= m.clusterofs) {
425 		m.headtype = m.type;
426 		map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
427 		/*
428 		 * For ztailpacking files, in order to inline data more
429 		 * effectively, special EOF lclusters are now supported
430 		 * which can have three parts at most.
431 		 */
432 		if (ztailpacking && end > inode->i_size)
433 			end = inode->i_size;
434 	} else {
435 		if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
436 			end = (m.lcn << lclusterbits) | m.clusterofs;
437 			map->m_flags &= ~EROFS_MAP_PARTIAL_MAPPED;
438 			m.delta[0] = 1;
439 		}
440 		/* get the corresponding first chunk */
441 		err = z_erofs_extent_lookback(&m, m.delta[0]);
442 		if (err)
443 			goto unmap_out;
444 	}
445 	if (m.partialref)
446 		map->m_flags |= EROFS_MAP_PARTIAL_REF;
447 	map->m_llen = end - map->m_la;
448 
449 	if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
450 		vi->z_tailextent_headlcn = m.lcn;
451 		/* for non-compact indexes, fragmentoff is 64 bits */
452 		if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL)
453 			vi->z_fragmentoff |= (u64)m.pblk << 32;
454 	}
455 	if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
456 		map->m_flags |= EROFS_MAP_META;
457 		map->m_pa = vi->z_fragmentoff;
458 		map->m_plen = vi->z_idata_size;
459 		if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) {
460 			erofs_err(sb, "ztailpacking inline data across blocks @ nid %llu",
461 				  vi->nid);
462 			err = -EFSCORRUPTED;
463 			goto unmap_out;
464 		}
465 	} else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
466 		map->m_flags = EROFS_MAP_FRAGMENT;
467 	} else {
468 		map->m_pa = erofs_pos(sb, m.pblk);
469 		err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
470 		if (err)
471 			goto unmap_out;
472 	}
473 
474 	if (m.headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN) {
475 		if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
476 			map->m_algorithmformat = Z_EROFS_COMPRESSION_INTERLACED;
477 		else
478 			map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
479 	} else if (m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) {
480 		map->m_algorithmformat = vi->z_algorithmtype[1];
481 	} else {
482 		map->m_algorithmformat = vi->z_algorithmtype[0];
483 	}
484 
485 	if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
486 	    ((flags & EROFS_GET_BLOCKS_READMORE) &&
487 	     (map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA ||
488 	      map->m_algorithmformat == Z_EROFS_COMPRESSION_DEFLATE ||
489 	      map->m_algorithmformat == Z_EROFS_COMPRESSION_ZSTD) &&
490 	      map->m_llen >= i_blocksize(inode))) {
491 		err = z_erofs_get_extent_decompressedlen(&m);
492 		if (!err)
493 			map->m_flags &= ~EROFS_MAP_PARTIAL_MAPPED;
494 	}
495 
496 unmap_out:
497 	erofs_unmap_metabuf(&m.map->buf);
498 	return err;
499 }
500 
z_erofs_map_blocks_ext(struct inode * inode,struct erofs_map_blocks * map,int flags)501 static int z_erofs_map_blocks_ext(struct inode *inode,
502 				  struct erofs_map_blocks *map, int flags)
503 {
504 	struct erofs_inode *vi = EROFS_I(inode);
505 	struct super_block *sb = inode->i_sb;
506 	bool interlaced = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
507 	unsigned int recsz = z_erofs_extent_recsize(vi->z_advise);
508 	erofs_off_t pos = round_up(Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) +
509 				   vi->inode_isize + vi->xattr_isize), recsz);
510 	unsigned int bmask = sb->s_blocksize - 1;
511 	bool in_mbox = erofs_inode_in_metabox(inode);
512 	erofs_off_t lend = inode->i_size;
513 	erofs_off_t l, r, mid, pa, la, lstart;
514 	struct z_erofs_extent *ext;
515 	unsigned int fmt;
516 	bool last;
517 
518 	map->m_flags = 0;
519 	if (recsz <= offsetof(struct z_erofs_extent, pstart_hi)) {
520 		if (recsz <= offsetof(struct z_erofs_extent, pstart_lo)) {
521 			ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox);
522 			if (IS_ERR(ext))
523 				return PTR_ERR(ext);
524 			pa = le64_to_cpu(*(__le64 *)ext);
525 			pos += sizeof(__le64);
526 			lstart = 0;
527 		} else {
528 			lstart = round_down(map->m_la, 1 << vi->z_lclusterbits);
529 			pos += (lstart >> vi->z_lclusterbits) * recsz;
530 			pa = EROFS_NULL_ADDR;
531 		}
532 
533 		for (; lstart <= map->m_la; lstart += 1 << vi->z_lclusterbits) {
534 			ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox);
535 			if (IS_ERR(ext))
536 				return PTR_ERR(ext);
537 			map->m_plen = le32_to_cpu(ext->plen);
538 			if (pa != EROFS_NULL_ADDR) {
539 				map->m_pa = pa;
540 				pa += map->m_plen & Z_EROFS_EXTENT_PLEN_MASK;
541 			} else {
542 				map->m_pa = le32_to_cpu(ext->pstart_lo);
543 			}
544 			pos += recsz;
545 		}
546 		last = (lstart >= round_up(lend, 1 << vi->z_lclusterbits));
547 		lend = min(lstart, lend);
548 		lstart -= 1 << vi->z_lclusterbits;
549 	} else {
550 		lstart = lend;
551 		for (l = 0, r = vi->z_extents; l < r; ) {
552 			mid = l + (r - l) / 2;
553 			ext = erofs_read_metabuf(&map->buf, sb,
554 						 pos + mid * recsz, in_mbox);
555 			if (IS_ERR(ext))
556 				return PTR_ERR(ext);
557 
558 			la = le32_to_cpu(ext->lstart_lo);
559 			pa = le32_to_cpu(ext->pstart_lo) |
560 				(u64)le32_to_cpu(ext->pstart_hi) << 32;
561 			if (recsz > offsetof(struct z_erofs_extent, lstart_hi))
562 				la |= (u64)le32_to_cpu(ext->lstart_hi) << 32;
563 
564 			if (la > map->m_la) {
565 				r = mid;
566 				if (la > lend) {
567 					DBG_BUGON(1);
568 					return -EFSCORRUPTED;
569 				}
570 				lend = la;
571 			} else {
572 				l = mid + 1;
573 				if (map->m_la == la)
574 					r = min(l + 1, r);
575 				lstart = la;
576 				map->m_plen = le32_to_cpu(ext->plen);
577 				map->m_pa = pa;
578 			}
579 		}
580 		last = (l >= vi->z_extents);
581 	}
582 
583 	if (lstart < lend) {
584 		map->m_la = lstart;
585 		if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) {
586 			map->m_flags = EROFS_MAP_FRAGMENT;
587 			vi->z_fragmentoff = map->m_plen;
588 			if (recsz > offsetof(struct z_erofs_extent, pstart_lo))
589 				vi->z_fragmentoff |= map->m_pa << 32;
590 		} else if (map->m_plen & Z_EROFS_EXTENT_PLEN_MASK) {
591 			map->m_flags |= EROFS_MAP_MAPPED;
592 			fmt = map->m_plen >> Z_EROFS_EXTENT_PLEN_FMT_BIT;
593 			if (map->m_plen & Z_EROFS_EXTENT_PLEN_PARTIAL)
594 				map->m_flags |= EROFS_MAP_PARTIAL_REF;
595 			map->m_plen &= Z_EROFS_EXTENT_PLEN_MASK;
596 			if (fmt)
597 				map->m_algorithmformat = fmt - 1;
598 			else if (interlaced && !((map->m_pa | map->m_plen) & bmask))
599 				map->m_algorithmformat =
600 					Z_EROFS_COMPRESSION_INTERLACED;
601 			else
602 				map->m_algorithmformat =
603 					Z_EROFS_COMPRESSION_SHIFTED;
604 		}
605 	}
606 	map->m_llen = lend - map->m_la;
607 	return 0;
608 }
609 
z_erofs_fill_inode(struct inode * inode,struct erofs_map_blocks * map)610 static int z_erofs_fill_inode(struct inode *inode, struct erofs_map_blocks *map)
611 {
612 	struct erofs_inode *const vi = EROFS_I(inode);
613 	struct super_block *const sb = inode->i_sb;
614 	struct z_erofs_map_header *h;
615 	erofs_off_t pos;
616 	int err = 0;
617 
618 	if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) {
619 		/*
620 		 * paired with smp_mb() at the end of the function to ensure
621 		 * fields will only be observed after the bit is set.
622 		 */
623 		smp_mb();
624 		return 0;
625 	}
626 
627 	if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE))
628 		return -ERESTARTSYS;
629 
630 	if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
631 		goto out_unlock;
632 
633 	pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
634 	h = erofs_read_metabuf(&map->buf, sb, pos, erofs_inode_in_metabox(inode));
635 	if (IS_ERR(h)) {
636 		err = PTR_ERR(h);
637 		goto out_unlock;
638 	}
639 
640 	/*
641 	 * if the highest bit of the 8-byte map header is set, the whole file
642 	 * is stored in the packed inode. The rest bits keeps z_fragmentoff.
643 	 */
644 	if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
645 		vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
646 		vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
647 		vi->z_tailextent_headlcn = 0;
648 		goto done;
649 	}
650 	vi->z_advise = le16_to_cpu(h->h_advise);
651 	vi->z_lclusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 15);
652 	if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL &&
653 	    (vi->z_advise & Z_EROFS_ADVISE_EXTENTS)) {
654 		vi->z_extents = le32_to_cpu(h->h_extents_lo) |
655 			((u64)le16_to_cpu(h->h_extents_hi) << 32);
656 		goto done;
657 	}
658 
659 	vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
660 	vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
661 	if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)
662 		vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
663 	else if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER)
664 		vi->z_idata_size = le16_to_cpu(h->h_idata_size);
665 
666 	if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
667 	    vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
668 			    Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
669 		erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
670 			  vi->nid);
671 		err = -EFSCORRUPTED;
672 		goto out_unlock;
673 	}
674 	if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT &&
675 	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
676 	    !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
677 		erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
678 			  vi->nid);
679 		err = -EFSCORRUPTED;
680 		goto out_unlock;
681 	}
682 
683 	if (vi->z_idata_size ||
684 	    (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) {
685 		struct erofs_map_blocks tm = {
686 			.buf = __EROFS_BUF_INITIALIZER
687 		};
688 
689 		err = z_erofs_map_blocks_fo(inode, &tm,
690 					    EROFS_GET_BLOCKS_FINDTAIL);
691 		erofs_put_metabuf(&tm.buf);
692 		if (err < 0)
693 			goto out_unlock;
694 	}
695 done:
696 	/* paired with smp_mb() at the beginning of the function */
697 	smp_mb();
698 	set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
699 out_unlock:
700 	clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
701 	return err;
702 }
703 
z_erofs_map_sanity_check(struct inode * inode,struct erofs_map_blocks * map)704 static int z_erofs_map_sanity_check(struct inode *inode,
705 				    struct erofs_map_blocks *map)
706 {
707 	struct erofs_sb_info *sbi = EROFS_I_SB(inode);
708 	u64 pend;
709 
710 	if (!(map->m_flags & EROFS_MAP_MAPPED))
711 		return 0;
712 	if (unlikely(map->m_algorithmformat >= Z_EROFS_COMPRESSION_RUNTIME_MAX)) {
713 		erofs_err(inode->i_sb, "unknown algorithm %d @ pos %llu for nid %llu, please upgrade kernel",
714 			  map->m_algorithmformat, map->m_la, EROFS_I(inode)->nid);
715 		return -EOPNOTSUPP;
716 	}
717 
718 	if (map->m_algorithmformat < Z_EROFS_COMPRESSION_MAX) {
719 		if (sbi->available_compr_algs ^ BIT(map->m_algorithmformat)) {
720 			erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu",
721 				  map->m_algorithmformat, EROFS_I(inode)->nid);
722 			return -EFSCORRUPTED;
723 		}
724 		if (EROFS_MAP_FULL(map->m_flags) && map->m_llen < map->m_plen) {
725 			erofs_err(inode->i_sb, "too much compressed data @ la %llu of nid %llu",
726 				  map->m_la, EROFS_I(inode)->nid);
727 			return -EFSCORRUPTED;
728 		}
729 	} else if (map->m_llen > map->m_plen) {
730 		erofs_err(inode->i_sb, "not enough plain data on disk @ la %llu of nid %llu",
731 			  map->m_la, EROFS_I(inode)->nid);
732 		return -EFSCORRUPTED;
733 	}
734 	if (unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE ||
735 		     map->m_llen > Z_EROFS_PCLUSTER_MAX_DSIZE))
736 		return -EOPNOTSUPP;
737 	/* Filesystems beyond 48-bit physical block addresses are invalid */
738 	if (unlikely(check_add_overflow(map->m_pa, map->m_plen, &pend) ||
739 		     (pend >> sbi->blkszbits) >= BIT_ULL(48)))
740 		return -EFSCORRUPTED;
741 	return 0;
742 }
743 
z_erofs_map_blocks_iter(struct inode * inode,struct erofs_map_blocks * map,int flags)744 int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
745 			    int flags)
746 {
747 	struct erofs_inode *const vi = EROFS_I(inode);
748 	int err = 0;
749 
750 	trace_erofs_map_blocks_enter(inode, map, flags);
751 	if (map->m_la >= inode->i_size) {	/* post-EOF unmapped extent */
752 		map->m_llen = map->m_la + 1 - inode->i_size;
753 		map->m_la = inode->i_size;
754 		map->m_flags = 0;
755 	} else {
756 		err = z_erofs_fill_inode(inode, map);
757 		if (!err) {
758 			if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL &&
759 			    (vi->z_advise & Z_EROFS_ADVISE_EXTENTS))
760 				err = z_erofs_map_blocks_ext(inode, map, flags);
761 			else
762 				err = z_erofs_map_blocks_fo(inode, map, flags);
763 		}
764 		if (!err)
765 			err = z_erofs_map_sanity_check(inode, map);
766 		if (err)
767 			map->m_llen = 0;
768 	}
769 	trace_erofs_map_blocks_exit(inode, map, flags, err);
770 	return err;
771 }
772 
z_erofs_iomap_begin_report(struct inode * inode,loff_t offset,loff_t length,unsigned int flags,struct iomap * iomap,struct iomap * srcmap)773 static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
774 				loff_t length, unsigned int flags,
775 				struct iomap *iomap, struct iomap *srcmap)
776 {
777 	int ret;
778 	struct erofs_map_blocks map = { .m_la = offset };
779 
780 	ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
781 	erofs_put_metabuf(&map.buf);
782 	if (ret < 0)
783 		return ret;
784 
785 	iomap->bdev = inode->i_sb->s_bdev;
786 	iomap->offset = map.m_la;
787 	iomap->length = map.m_llen;
788 	if (map.m_flags & EROFS_MAP_FRAGMENT) {
789 		iomap->type = IOMAP_MAPPED;
790 		iomap->addr = IOMAP_NULL_ADDR;
791 	} else if (map.m_flags & EROFS_MAP_MAPPED) {
792 		iomap->type = IOMAP_MAPPED;
793 		iomap->addr = map.m_pa;
794 	} else {
795 		iomap->type = IOMAP_HOLE;
796 		iomap->addr = IOMAP_NULL_ADDR;
797 		/*
798 		 * No strict rule on how to describe extents for post EOF, yet
799 		 * we need to do like below. Otherwise, iomap itself will get
800 		 * into an endless loop on post EOF.
801 		 *
802 		 * Calculate the effective offset by subtracting extent start
803 		 * (map.m_la) from the requested offset, and add it to length.
804 		 * (NB: offset >= map.m_la always)
805 		 */
806 		if (iomap->offset >= inode->i_size)
807 			iomap->length = length + offset - map.m_la;
808 	}
809 	iomap->flags = 0;
810 	return 0;
811 }
812 
813 const struct iomap_ops z_erofs_iomap_report_ops = {
814 	.iomap_begin = z_erofs_iomap_begin_report,
815 };
816