1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2018-2019 HUAWEI, Inc.
4 * https://www.huawei.com/
5 */
6 #include "internal.h"
7 #include <linux/unaligned.h>
8 #include <trace/events/erofs.h>
9
10 struct z_erofs_maprecorder {
11 struct inode *inode;
12 struct erofs_map_blocks *map;
13 u64 lcn;
14 /* compression extent information gathered */
15 u8 type, headtype;
16 u16 clusterofs;
17 u16 delta[2];
18 erofs_blk_t pblk, compressedblks;
19 erofs_off_t nextpackoff;
20 bool partialref, in_mbox;
21 };
22
z_erofs_load_full_lcluster(struct z_erofs_maprecorder * m,u64 lcn)23 static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, u64 lcn)
24 {
25 struct inode *const inode = m->inode;
26 struct erofs_inode *const vi = EROFS_I(inode);
27 const erofs_off_t pos = Z_EROFS_FULL_INDEX_START(erofs_iloc(inode) +
28 vi->inode_isize + vi->xattr_isize) +
29 lcn * sizeof(struct z_erofs_lcluster_index);
30 struct z_erofs_lcluster_index *di;
31 unsigned int advise;
32
33 di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox);
34 if (IS_ERR(di))
35 return PTR_ERR(di);
36 m->lcn = lcn;
37 m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index);
38
39 advise = le16_to_cpu(di->di_advise);
40 m->type = advise & Z_EROFS_LI_LCLUSTER_TYPE_MASK;
41 if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
42 m->clusterofs = 1 << vi->z_lclusterbits;
43 m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
44 if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) {
45 if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
46 Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
47 DBG_BUGON(1);
48 return -EFSCORRUPTED;
49 }
50 m->compressedblks = m->delta[0] & ~Z_EROFS_LI_D0_CBLKCNT;
51 m->delta[0] = 1;
52 }
53 m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
54 } else {
55 m->partialref = !!(advise & Z_EROFS_LI_PARTIAL_REF);
56 m->clusterofs = le16_to_cpu(di->di_clusterofs);
57 m->pblk = le32_to_cpu(di->di_u.blkaddr);
58 }
59 return 0;
60 }
61
decode_compactedbits(unsigned int lobits,u8 * in,unsigned int pos,u8 * type)62 static unsigned int decode_compactedbits(unsigned int lobits,
63 u8 *in, unsigned int pos, u8 *type)
64 {
65 const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
66 const unsigned int lo = v & ((1 << lobits) - 1);
67
68 *type = (v >> lobits) & 3;
69 return lo;
70 }
71
get_compacted_la_distance(unsigned int lobits,unsigned int encodebits,unsigned int vcnt,u8 * in,int i)72 static int get_compacted_la_distance(unsigned int lobits,
73 unsigned int encodebits,
74 unsigned int vcnt, u8 *in, int i)
75 {
76 unsigned int lo, d1 = 0;
77 u8 type;
78
79 DBG_BUGON(i >= vcnt);
80
81 do {
82 lo = decode_compactedbits(lobits, in, encodebits * i, &type);
83
84 if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
85 return d1;
86 ++d1;
87 } while (++i < vcnt);
88
89 /* vcnt - 1 (Z_EROFS_LCLUSTER_TYPE_NONHEAD) item */
90 if (!(lo & Z_EROFS_LI_D0_CBLKCNT))
91 d1 += lo - 1;
92 return d1;
93 }
94
z_erofs_load_compact_lcluster(struct z_erofs_maprecorder * m,u64 lcn,bool lookahead)95 static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
96 u64 lcn, bool lookahead)
97 {
98 struct inode *const inode = m->inode;
99 struct erofs_inode *const vi = EROFS_I(inode);
100 const erofs_off_t ebase = Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) +
101 vi->inode_isize + vi->xattr_isize);
102 const unsigned int lclusterbits = vi->z_lclusterbits;
103 const unsigned int totalidx = erofs_iblks(inode);
104 unsigned int compacted_4b_initial, compacted_2b, amortizedshift;
105 unsigned int vcnt, lo, lobits, encodebits, nblk, bytes;
106 bool big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
107 erofs_off_t pos;
108 u8 *in, type;
109 int i;
110
111 if (lcn >= totalidx || lclusterbits > 14)
112 return -EINVAL;
113
114 m->lcn = lcn;
115 /* used to align to 32-byte (compacted_2b) alignment */
116 compacted_4b_initial = ((32 - ebase % 32) / 4) & 7;
117 compacted_2b = 0;
118 if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) &&
119 compacted_4b_initial < totalidx)
120 compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
121
122 pos = ebase;
123 amortizedshift = 2; /* compact_4b */
124 if (lcn >= compacted_4b_initial) {
125 pos += compacted_4b_initial * 4;
126 lcn -= compacted_4b_initial;
127 if (lcn < compacted_2b) {
128 amortizedshift = 1;
129 } else {
130 pos += compacted_2b * 2;
131 lcn -= compacted_2b;
132 }
133 }
134 pos += lcn * (1 << amortizedshift);
135
136 /* figure out the lcluster count in this pack */
137 if (1 << amortizedshift == 4 && lclusterbits <= 14)
138 vcnt = 2;
139 else if (1 << amortizedshift == 2 && lclusterbits <= 12)
140 vcnt = 16;
141 else
142 return -EOPNOTSUPP;
143
144 in = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, m->in_mbox);
145 if (IS_ERR(in))
146 return PTR_ERR(in);
147
148 /* it doesn't equal to round_up(..) */
149 m->nextpackoff = round_down(pos, vcnt << amortizedshift) +
150 (vcnt << amortizedshift);
151 lobits = max(lclusterbits, ilog2(Z_EROFS_LI_D0_CBLKCNT) + 1U);
152 encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
153 bytes = pos & ((vcnt << amortizedshift) - 1);
154 in -= bytes;
155 i = bytes >> amortizedshift;
156
157 lo = decode_compactedbits(lobits, in, encodebits * i, &type);
158 m->type = type;
159 if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
160 m->clusterofs = 1 << lclusterbits;
161
162 /* figure out lookahead_distance: delta[1] if needed */
163 if (lookahead)
164 m->delta[1] = get_compacted_la_distance(lobits,
165 encodebits, vcnt, in, i);
166 if (lo & Z_EROFS_LI_D0_CBLKCNT) {
167 if (!big_pcluster) {
168 DBG_BUGON(1);
169 return -EFSCORRUPTED;
170 }
171 m->compressedblks = lo & ~Z_EROFS_LI_D0_CBLKCNT;
172 m->delta[0] = 1;
173 return 0;
174 } else if (i + 1 != (int)vcnt) {
175 m->delta[0] = lo;
176 return 0;
177 }
178 /*
179 * since the last lcluster in the pack is special,
180 * of which lo saves delta[1] rather than delta[0].
181 * Hence, get delta[0] by the previous lcluster indirectly.
182 */
183 lo = decode_compactedbits(lobits, in,
184 encodebits * (i - 1), &type);
185 if (type != Z_EROFS_LCLUSTER_TYPE_NONHEAD)
186 lo = 0;
187 else if (lo & Z_EROFS_LI_D0_CBLKCNT)
188 lo = 1;
189 m->delta[0] = lo + 1;
190 return 0;
191 }
192 m->clusterofs = lo;
193 m->delta[0] = 0;
194 /* figout out blkaddr (pblk) for HEAD lclusters */
195 if (!big_pcluster) {
196 nblk = 1;
197 while (i > 0) {
198 --i;
199 lo = decode_compactedbits(lobits, in,
200 encodebits * i, &type);
201 if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD)
202 i -= lo;
203
204 if (i >= 0)
205 ++nblk;
206 }
207 } else {
208 nblk = 0;
209 while (i > 0) {
210 --i;
211 lo = decode_compactedbits(lobits, in,
212 encodebits * i, &type);
213 if (type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
214 if (lo & Z_EROFS_LI_D0_CBLKCNT) {
215 --i;
216 nblk += lo & ~Z_EROFS_LI_D0_CBLKCNT;
217 continue;
218 }
219 /* bigpcluster shouldn't have plain d0 == 1 */
220 if (lo <= 1) {
221 DBG_BUGON(1);
222 return -EFSCORRUPTED;
223 }
224 i -= lo - 2;
225 continue;
226 }
227 ++nblk;
228 }
229 }
230 in += (vcnt << amortizedshift) - sizeof(__le32);
231 m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
232 return 0;
233 }
234
z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder * m,u64 lcn,bool lookahead)235 static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m,
236 u64 lcn, bool lookahead)
237 {
238 struct erofs_inode *vi = EROFS_I(m->inode);
239 int err;
240
241 if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT) {
242 err = z_erofs_load_compact_lcluster(m, lcn, lookahead);
243 } else {
244 DBG_BUGON(vi->datalayout != EROFS_INODE_COMPRESSED_FULL);
245 err = z_erofs_load_full_lcluster(m, lcn);
246 }
247 if (err)
248 return err;
249
250 if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) {
251 erofs_err(m->inode->i_sb, "unknown type %u @ lcn %llu of nid %llu",
252 m->type, lcn, EROFS_I(m->inode)->nid);
253 DBG_BUGON(1);
254 return -EOPNOTSUPP;
255 } else if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD &&
256 m->clusterofs >= (1 << vi->z_lclusterbits)) {
257 DBG_BUGON(1);
258 return -EFSCORRUPTED;
259 }
260 return 0;
261 }
262
z_erofs_extent_lookback(struct z_erofs_maprecorder * m,unsigned int lookback_distance)263 static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
264 unsigned int lookback_distance)
265 {
266 struct super_block *sb = m->inode->i_sb;
267 struct erofs_inode *const vi = EROFS_I(m->inode);
268 const unsigned int lclusterbits = vi->z_lclusterbits;
269
270 while (m->lcn >= lookback_distance) {
271 u64 lcn = m->lcn - lookback_distance;
272 int err;
273
274 if (!lookback_distance)
275 break;
276
277 err = z_erofs_load_lcluster_from_disk(m, lcn, false);
278 if (err)
279 return err;
280 if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
281 lookback_distance = m->delta[0];
282 continue;
283 }
284 m->headtype = m->type;
285 m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
286 return 0;
287 }
288 erofs_err(sb, "bogus lookback distance %u @ lcn %llu of nid %llu",
289 lookback_distance, m->lcn, vi->nid);
290 DBG_BUGON(1);
291 return -EFSCORRUPTED;
292 }
293
z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder * m,unsigned int initial_lcn)294 static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
295 unsigned int initial_lcn)
296 {
297 struct inode *inode = m->inode;
298 struct super_block *sb = inode->i_sb;
299 struct erofs_inode *vi = EROFS_I(inode);
300 bool bigpcl1 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
301 bool bigpcl2 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2;
302 u64 lcn = m->lcn + 1;
303 int err;
304
305 DBG_BUGON(m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
306 DBG_BUGON(m->type != m->headtype);
307
308 if ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1 && !bigpcl1) ||
309 ((m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
310 m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && !bigpcl2) ||
311 (lcn << vi->z_lclusterbits) >= inode->i_size)
312 m->compressedblks = 1;
313
314 if (m->compressedblks)
315 goto out;
316
317 err = z_erofs_load_lcluster_from_disk(m, lcn, false);
318 if (err)
319 return err;
320
321 /*
322 * If the 1st NONHEAD lcluster has already been handled initially w/o
323 * valid compressedblks, which means at least it mustn't be CBLKCNT, or
324 * an internal implemenatation error is detected.
325 *
326 * The following code can also handle it properly anyway, but let's
327 * BUG_ON in the debugging mode only for developers to notice that.
328 */
329 DBG_BUGON(lcn == initial_lcn &&
330 m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
331
332 if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD && m->delta[0] != 1) {
333 erofs_err(sb, "bogus CBLKCNT @ lcn %llu of nid %llu", lcn, vi->nid);
334 DBG_BUGON(1);
335 return -EFSCORRUPTED;
336 }
337
338 /*
339 * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type rather
340 * than CBLKCNT, it's a 1 block-sized pcluster.
341 */
342 if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD || !m->compressedblks)
343 m->compressedblks = 1;
344 out:
345 m->map->m_plen = erofs_pos(sb, m->compressedblks);
346 return 0;
347 }
348
z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder * m)349 static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
350 {
351 struct inode *inode = m->inode;
352 struct erofs_inode *vi = EROFS_I(inode);
353 struct erofs_map_blocks *map = m->map;
354 unsigned int lclusterbits = vi->z_lclusterbits;
355 u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits;
356 int err;
357
358 while (1) {
359 /* handle the last EOF pcluster (no next HEAD lcluster) */
360 if ((lcn << lclusterbits) >= inode->i_size) {
361 map->m_llen = inode->i_size - map->m_la;
362 return 0;
363 }
364
365 err = z_erofs_load_lcluster_from_disk(m, lcn, true);
366 if (err)
367 return err;
368
369 if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
370 /* work around invalid d1 generated by pre-1.0 mkfs */
371 if (unlikely(!m->delta[1])) {
372 m->delta[1] = 1;
373 DBG_BUGON(1);
374 }
375 } else if (m->type < Z_EROFS_LCLUSTER_TYPE_MAX) {
376 if (lcn != headlcn)
377 break; /* ends at the next HEAD lcluster */
378 m->delta[1] = 1;
379 }
380 lcn += m->delta[1];
381 }
382 map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la;
383 return 0;
384 }
385
z_erofs_map_blocks_fo(struct inode * inode,struct erofs_map_blocks * map,int flags)386 static int z_erofs_map_blocks_fo(struct inode *inode,
387 struct erofs_map_blocks *map, int flags)
388 {
389 struct erofs_inode *vi = EROFS_I(inode);
390 struct super_block *sb = inode->i_sb;
391 bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
392 bool ztailpacking = vi->z_idata_size;
393 unsigned int lclusterbits = vi->z_lclusterbits;
394 struct z_erofs_maprecorder m = {
395 .inode = inode,
396 .map = map,
397 .in_mbox = erofs_inode_in_metabox(inode),
398 };
399 unsigned int endoff;
400 unsigned long initial_lcn;
401 unsigned long long ofs, end;
402 int err;
403
404 ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la;
405 if (fragment && !(flags & EROFS_GET_BLOCKS_FINDTAIL) &&
406 !vi->z_tailextent_headlcn) {
407 map->m_la = 0;
408 map->m_llen = inode->i_size;
409 map->m_flags = EROFS_MAP_FRAGMENT;
410 return 0;
411 }
412 initial_lcn = ofs >> lclusterbits;
413 endoff = ofs & ((1 << lclusterbits) - 1);
414
415 err = z_erofs_load_lcluster_from_disk(&m, initial_lcn, false);
416 if (err)
417 goto unmap_out;
418
419 if ((flags & EROFS_GET_BLOCKS_FINDTAIL) && ztailpacking)
420 vi->z_fragmentoff = m.nextpackoff;
421 map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_PARTIAL_MAPPED;
422 end = (m.lcn + 1ULL) << lclusterbits;
423
424 if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD && endoff >= m.clusterofs) {
425 m.headtype = m.type;
426 map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
427 /*
428 * For ztailpacking files, in order to inline data more
429 * effectively, special EOF lclusters are now supported
430 * which can have three parts at most.
431 */
432 if (ztailpacking && end > inode->i_size)
433 end = inode->i_size;
434 } else {
435 if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
436 end = (m.lcn << lclusterbits) | m.clusterofs;
437 map->m_flags &= ~EROFS_MAP_PARTIAL_MAPPED;
438 m.delta[0] = 1;
439 }
440 /* get the corresponding first chunk */
441 err = z_erofs_extent_lookback(&m, m.delta[0]);
442 if (err)
443 goto unmap_out;
444 }
445 if (m.partialref)
446 map->m_flags |= EROFS_MAP_PARTIAL_REF;
447 map->m_llen = end - map->m_la;
448
449 if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
450 vi->z_tailextent_headlcn = m.lcn;
451 /* for non-compact indexes, fragmentoff is 64 bits */
452 if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL)
453 vi->z_fragmentoff |= (u64)m.pblk << 32;
454 }
455 if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
456 map->m_flags |= EROFS_MAP_META;
457 map->m_pa = vi->z_fragmentoff;
458 map->m_plen = vi->z_idata_size;
459 if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) {
460 erofs_err(sb, "ztailpacking inline data across blocks @ nid %llu",
461 vi->nid);
462 err = -EFSCORRUPTED;
463 goto unmap_out;
464 }
465 } else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
466 map->m_flags = EROFS_MAP_FRAGMENT;
467 } else {
468 map->m_pa = erofs_pos(sb, m.pblk);
469 err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
470 if (err)
471 goto unmap_out;
472 }
473
474 if (m.headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN) {
475 if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
476 map->m_algorithmformat = Z_EROFS_COMPRESSION_INTERLACED;
477 else
478 map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
479 } else if (m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) {
480 map->m_algorithmformat = vi->z_algorithmtype[1];
481 } else {
482 map->m_algorithmformat = vi->z_algorithmtype[0];
483 }
484
485 if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
486 ((flags & EROFS_GET_BLOCKS_READMORE) &&
487 (map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA ||
488 map->m_algorithmformat == Z_EROFS_COMPRESSION_DEFLATE ||
489 map->m_algorithmformat == Z_EROFS_COMPRESSION_ZSTD) &&
490 map->m_llen >= i_blocksize(inode))) {
491 err = z_erofs_get_extent_decompressedlen(&m);
492 if (!err)
493 map->m_flags &= ~EROFS_MAP_PARTIAL_MAPPED;
494 }
495
496 unmap_out:
497 erofs_unmap_metabuf(&m.map->buf);
498 return err;
499 }
500
z_erofs_map_blocks_ext(struct inode * inode,struct erofs_map_blocks * map,int flags)501 static int z_erofs_map_blocks_ext(struct inode *inode,
502 struct erofs_map_blocks *map, int flags)
503 {
504 struct erofs_inode *vi = EROFS_I(inode);
505 struct super_block *sb = inode->i_sb;
506 bool interlaced = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
507 unsigned int recsz = z_erofs_extent_recsize(vi->z_advise);
508 erofs_off_t pos = round_up(Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) +
509 vi->inode_isize + vi->xattr_isize), recsz);
510 unsigned int bmask = sb->s_blocksize - 1;
511 bool in_mbox = erofs_inode_in_metabox(inode);
512 erofs_off_t lend = inode->i_size;
513 erofs_off_t l, r, mid, pa, la, lstart;
514 struct z_erofs_extent *ext;
515 unsigned int fmt;
516 bool last;
517
518 map->m_flags = 0;
519 if (recsz <= offsetof(struct z_erofs_extent, pstart_hi)) {
520 if (recsz <= offsetof(struct z_erofs_extent, pstart_lo)) {
521 ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox);
522 if (IS_ERR(ext))
523 return PTR_ERR(ext);
524 pa = le64_to_cpu(*(__le64 *)ext);
525 pos += sizeof(__le64);
526 lstart = 0;
527 } else {
528 lstart = round_down(map->m_la, 1 << vi->z_lclusterbits);
529 pos += (lstart >> vi->z_lclusterbits) * recsz;
530 pa = EROFS_NULL_ADDR;
531 }
532
533 for (; lstart <= map->m_la; lstart += 1 << vi->z_lclusterbits) {
534 ext = erofs_read_metabuf(&map->buf, sb, pos, in_mbox);
535 if (IS_ERR(ext))
536 return PTR_ERR(ext);
537 map->m_plen = le32_to_cpu(ext->plen);
538 if (pa != EROFS_NULL_ADDR) {
539 map->m_pa = pa;
540 pa += map->m_plen & Z_EROFS_EXTENT_PLEN_MASK;
541 } else {
542 map->m_pa = le32_to_cpu(ext->pstart_lo);
543 }
544 pos += recsz;
545 }
546 last = (lstart >= round_up(lend, 1 << vi->z_lclusterbits));
547 lend = min(lstart, lend);
548 lstart -= 1 << vi->z_lclusterbits;
549 } else {
550 lstart = lend;
551 for (l = 0, r = vi->z_extents; l < r; ) {
552 mid = l + (r - l) / 2;
553 ext = erofs_read_metabuf(&map->buf, sb,
554 pos + mid * recsz, in_mbox);
555 if (IS_ERR(ext))
556 return PTR_ERR(ext);
557
558 la = le32_to_cpu(ext->lstart_lo);
559 pa = le32_to_cpu(ext->pstart_lo) |
560 (u64)le32_to_cpu(ext->pstart_hi) << 32;
561 if (recsz > offsetof(struct z_erofs_extent, lstart_hi))
562 la |= (u64)le32_to_cpu(ext->lstart_hi) << 32;
563
564 if (la > map->m_la) {
565 r = mid;
566 if (la > lend) {
567 DBG_BUGON(1);
568 return -EFSCORRUPTED;
569 }
570 lend = la;
571 } else {
572 l = mid + 1;
573 if (map->m_la == la)
574 r = min(l + 1, r);
575 lstart = la;
576 map->m_plen = le32_to_cpu(ext->plen);
577 map->m_pa = pa;
578 }
579 }
580 last = (l >= vi->z_extents);
581 }
582
583 if (lstart < lend) {
584 map->m_la = lstart;
585 if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) {
586 map->m_flags = EROFS_MAP_FRAGMENT;
587 vi->z_fragmentoff = map->m_plen;
588 if (recsz > offsetof(struct z_erofs_extent, pstart_lo))
589 vi->z_fragmentoff |= map->m_pa << 32;
590 } else if (map->m_plen & Z_EROFS_EXTENT_PLEN_MASK) {
591 map->m_flags |= EROFS_MAP_MAPPED;
592 fmt = map->m_plen >> Z_EROFS_EXTENT_PLEN_FMT_BIT;
593 if (map->m_plen & Z_EROFS_EXTENT_PLEN_PARTIAL)
594 map->m_flags |= EROFS_MAP_PARTIAL_REF;
595 map->m_plen &= Z_EROFS_EXTENT_PLEN_MASK;
596 if (fmt)
597 map->m_algorithmformat = fmt - 1;
598 else if (interlaced && !((map->m_pa | map->m_plen) & bmask))
599 map->m_algorithmformat =
600 Z_EROFS_COMPRESSION_INTERLACED;
601 else
602 map->m_algorithmformat =
603 Z_EROFS_COMPRESSION_SHIFTED;
604 }
605 }
606 map->m_llen = lend - map->m_la;
607 return 0;
608 }
609
z_erofs_fill_inode(struct inode * inode,struct erofs_map_blocks * map)610 static int z_erofs_fill_inode(struct inode *inode, struct erofs_map_blocks *map)
611 {
612 struct erofs_inode *const vi = EROFS_I(inode);
613 struct super_block *const sb = inode->i_sb;
614 struct z_erofs_map_header *h;
615 erofs_off_t pos;
616 int err = 0;
617
618 if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) {
619 /*
620 * paired with smp_mb() at the end of the function to ensure
621 * fields will only be observed after the bit is set.
622 */
623 smp_mb();
624 return 0;
625 }
626
627 if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE))
628 return -ERESTARTSYS;
629
630 if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
631 goto out_unlock;
632
633 pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
634 h = erofs_read_metabuf(&map->buf, sb, pos, erofs_inode_in_metabox(inode));
635 if (IS_ERR(h)) {
636 err = PTR_ERR(h);
637 goto out_unlock;
638 }
639
640 /*
641 * if the highest bit of the 8-byte map header is set, the whole file
642 * is stored in the packed inode. The rest bits keeps z_fragmentoff.
643 */
644 if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
645 vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
646 vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
647 vi->z_tailextent_headlcn = 0;
648 goto done;
649 }
650 vi->z_advise = le16_to_cpu(h->h_advise);
651 vi->z_lclusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 15);
652 if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL &&
653 (vi->z_advise & Z_EROFS_ADVISE_EXTENTS)) {
654 vi->z_extents = le32_to_cpu(h->h_extents_lo) |
655 ((u64)le16_to_cpu(h->h_extents_hi) << 32);
656 goto done;
657 }
658
659 vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
660 vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
661 if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)
662 vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
663 else if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER)
664 vi->z_idata_size = le16_to_cpu(h->h_idata_size);
665
666 if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
667 vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
668 Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
669 erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
670 vi->nid);
671 err = -EFSCORRUPTED;
672 goto out_unlock;
673 }
674 if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT &&
675 !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
676 !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
677 erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
678 vi->nid);
679 err = -EFSCORRUPTED;
680 goto out_unlock;
681 }
682
683 if (vi->z_idata_size ||
684 (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) {
685 struct erofs_map_blocks tm = {
686 .buf = __EROFS_BUF_INITIALIZER
687 };
688
689 err = z_erofs_map_blocks_fo(inode, &tm,
690 EROFS_GET_BLOCKS_FINDTAIL);
691 erofs_put_metabuf(&tm.buf);
692 if (err < 0)
693 goto out_unlock;
694 }
695 done:
696 /* paired with smp_mb() at the beginning of the function */
697 smp_mb();
698 set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
699 out_unlock:
700 clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
701 return err;
702 }
703
z_erofs_map_sanity_check(struct inode * inode,struct erofs_map_blocks * map)704 static int z_erofs_map_sanity_check(struct inode *inode,
705 struct erofs_map_blocks *map)
706 {
707 struct erofs_sb_info *sbi = EROFS_I_SB(inode);
708 u64 pend;
709
710 if (!(map->m_flags & EROFS_MAP_MAPPED))
711 return 0;
712 if (unlikely(map->m_algorithmformat >= Z_EROFS_COMPRESSION_RUNTIME_MAX)) {
713 erofs_err(inode->i_sb, "unknown algorithm %d @ pos %llu for nid %llu, please upgrade kernel",
714 map->m_algorithmformat, map->m_la, EROFS_I(inode)->nid);
715 return -EOPNOTSUPP;
716 }
717
718 if (map->m_algorithmformat < Z_EROFS_COMPRESSION_MAX) {
719 if (sbi->available_compr_algs ^ BIT(map->m_algorithmformat)) {
720 erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu",
721 map->m_algorithmformat, EROFS_I(inode)->nid);
722 return -EFSCORRUPTED;
723 }
724 if (EROFS_MAP_FULL(map->m_flags) && map->m_llen < map->m_plen) {
725 erofs_err(inode->i_sb, "too much compressed data @ la %llu of nid %llu",
726 map->m_la, EROFS_I(inode)->nid);
727 return -EFSCORRUPTED;
728 }
729 } else if (map->m_llen > map->m_plen) {
730 erofs_err(inode->i_sb, "not enough plain data on disk @ la %llu of nid %llu",
731 map->m_la, EROFS_I(inode)->nid);
732 return -EFSCORRUPTED;
733 }
734 if (unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE ||
735 map->m_llen > Z_EROFS_PCLUSTER_MAX_DSIZE))
736 return -EOPNOTSUPP;
737 /* Filesystems beyond 48-bit physical block addresses are invalid */
738 if (unlikely(check_add_overflow(map->m_pa, map->m_plen, &pend) ||
739 (pend >> sbi->blkszbits) >= BIT_ULL(48)))
740 return -EFSCORRUPTED;
741 return 0;
742 }
743
z_erofs_map_blocks_iter(struct inode * inode,struct erofs_map_blocks * map,int flags)744 int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
745 int flags)
746 {
747 struct erofs_inode *const vi = EROFS_I(inode);
748 int err = 0;
749
750 trace_erofs_map_blocks_enter(inode, map, flags);
751 if (map->m_la >= inode->i_size) { /* post-EOF unmapped extent */
752 map->m_llen = map->m_la + 1 - inode->i_size;
753 map->m_la = inode->i_size;
754 map->m_flags = 0;
755 } else {
756 err = z_erofs_fill_inode(inode, map);
757 if (!err) {
758 if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL &&
759 (vi->z_advise & Z_EROFS_ADVISE_EXTENTS))
760 err = z_erofs_map_blocks_ext(inode, map, flags);
761 else
762 err = z_erofs_map_blocks_fo(inode, map, flags);
763 }
764 if (!err)
765 err = z_erofs_map_sanity_check(inode, map);
766 if (err)
767 map->m_llen = 0;
768 }
769 trace_erofs_map_blocks_exit(inode, map, flags, err);
770 return err;
771 }
772
z_erofs_iomap_begin_report(struct inode * inode,loff_t offset,loff_t length,unsigned int flags,struct iomap * iomap,struct iomap * srcmap)773 static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
774 loff_t length, unsigned int flags,
775 struct iomap *iomap, struct iomap *srcmap)
776 {
777 int ret;
778 struct erofs_map_blocks map = { .m_la = offset };
779
780 ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP);
781 erofs_put_metabuf(&map.buf);
782 if (ret < 0)
783 return ret;
784
785 iomap->bdev = inode->i_sb->s_bdev;
786 iomap->offset = map.m_la;
787 iomap->length = map.m_llen;
788 if (map.m_flags & EROFS_MAP_FRAGMENT) {
789 iomap->type = IOMAP_MAPPED;
790 iomap->addr = IOMAP_NULL_ADDR;
791 } else if (map.m_flags & EROFS_MAP_MAPPED) {
792 iomap->type = IOMAP_MAPPED;
793 iomap->addr = map.m_pa;
794 } else {
795 iomap->type = IOMAP_HOLE;
796 iomap->addr = IOMAP_NULL_ADDR;
797 /*
798 * No strict rule on how to describe extents for post EOF, yet
799 * we need to do like below. Otherwise, iomap itself will get
800 * into an endless loop on post EOF.
801 *
802 * Calculate the effective offset by subtracting extent start
803 * (map.m_la) from the requested offset, and add it to length.
804 * (NB: offset >= map.m_la always)
805 */
806 if (iomap->offset >= inode->i_size)
807 iomap->length = length + offset - map.m_la;
808 }
809 iomap->flags = 0;
810 return 0;
811 }
812
813 const struct iomap_ops z_erofs_iomap_report_ops = {
814 .iomap_begin = z_erofs_iomap_begin_report,
815 };
816