1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017-2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 * Copyright (C) 2021, Alibaba Cloud 6 */ 7 #include "internal.h" 8 #include <linux/sched/mm.h> 9 #include <trace/events/erofs.h> 10 11 void erofs_unmap_metabuf(struct erofs_buf *buf) 12 { 13 if (!buf->base) 14 return; 15 kunmap_local(buf->base); 16 buf->base = NULL; 17 } 18 19 void erofs_put_metabuf(struct erofs_buf *buf) 20 { 21 if (!buf->page) 22 return; 23 erofs_unmap_metabuf(buf); 24 folio_put(page_folio(buf->page)); 25 buf->page = NULL; 26 } 27 28 void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap) 29 { 30 pgoff_t index = (buf->off + offset) >> PAGE_SHIFT; 31 struct folio *folio = NULL; 32 33 if (buf->page) { 34 folio = page_folio(buf->page); 35 if (folio_file_page(folio, index) != buf->page) 36 erofs_unmap_metabuf(buf); 37 } 38 if (!folio || !folio_contains(folio, index)) { 39 erofs_put_metabuf(buf); 40 folio = read_mapping_folio(buf->mapping, index, buf->file); 41 if (IS_ERR(folio)) 42 return folio; 43 } 44 buf->page = folio_file_page(folio, index); 45 if (!need_kmap) 46 return NULL; 47 if (!buf->base) 48 buf->base = kmap_local_page(buf->page); 49 return buf->base + (offset & ~PAGE_MASK); 50 } 51 52 int erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb, 53 bool in_metabox) 54 { 55 struct erofs_sb_info *sbi = EROFS_SB(sb); 56 57 buf->file = NULL; 58 if (in_metabox) { 59 if (unlikely(!sbi->metabox_inode)) 60 return -EFSCORRUPTED; 61 buf->mapping = sbi->metabox_inode->i_mapping; 62 return 0; 63 } 64 buf->off = sbi->dif0.fsoff; 65 if (erofs_is_fileio_mode(sbi)) { 66 buf->file = sbi->dif0.file; /* some fs like FUSE needs it */ 67 buf->mapping = buf->file->f_mapping; 68 } else if (erofs_is_fscache_mode(sb)) 69 buf->mapping = sbi->dif0.fscache->inode->i_mapping; 70 else 71 buf->mapping = sb->s_bdev->bd_mapping; 72 return 0; 73 } 74 75 void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, 76 erofs_off_t offset, bool in_metabox) 77 { 78 int err; 79 80 err = erofs_init_metabuf(buf, sb, in_metabox); 81 if (err) 82 return ERR_PTR(err); 83 return erofs_bread(buf, offset, true); 84 } 85 86 int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) 87 { 88 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 89 struct super_block *sb = inode->i_sb; 90 unsigned int unit, blksz = sb->s_blocksize; 91 struct erofs_inode *vi = EROFS_I(inode); 92 struct erofs_inode_chunk_index *idx; 93 erofs_blk_t startblk, addrmask; 94 bool tailpacking; 95 erofs_off_t pos; 96 u64 chunknr; 97 int err = 0; 98 99 trace_erofs_map_blocks_enter(inode, map, 0); 100 map->m_deviceid = 0; 101 map->m_flags = 0; 102 if (map->m_la >= inode->i_size) 103 goto out; 104 105 if (vi->datalayout != EROFS_INODE_CHUNK_BASED) { 106 tailpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); 107 if (!tailpacking && vi->startblk == EROFS_NULL_ADDR) 108 goto out; 109 pos = erofs_pos(sb, erofs_iblks(inode) - tailpacking); 110 111 map->m_flags = EROFS_MAP_MAPPED; 112 if (map->m_la < pos) { 113 map->m_pa = erofs_pos(sb, vi->startblk) + map->m_la; 114 map->m_llen = pos - map->m_la; 115 } else { 116 map->m_pa = erofs_iloc(inode) + vi->inode_isize + 117 vi->xattr_isize + erofs_blkoff(sb, map->m_la); 118 map->m_llen = inode->i_size - map->m_la; 119 map->m_flags |= EROFS_MAP_META; 120 } 121 goto out; 122 } 123 124 if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES) 125 unit = sizeof(*idx); /* chunk index */ 126 else 127 unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */ 128 129 chunknr = map->m_la >> vi->chunkbits; 130 pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + 131 vi->xattr_isize, unit) + unit * chunknr; 132 133 idx = erofs_read_metabuf(&buf, sb, pos, erofs_inode_in_metabox(inode)); 134 if (IS_ERR(idx)) { 135 err = PTR_ERR(idx); 136 goto out; 137 } 138 map->m_la = chunknr << vi->chunkbits; 139 map->m_llen = min_t(erofs_off_t, 1UL << vi->chunkbits, 140 round_up(inode->i_size - map->m_la, blksz)); 141 if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES) { 142 addrmask = (vi->chunkformat & EROFS_CHUNK_FORMAT_48BIT) ? 143 BIT_ULL(48) - 1 : BIT_ULL(32) - 1; 144 startblk = (((u64)le16_to_cpu(idx->startblk_hi) << 32) | 145 le32_to_cpu(idx->startblk_lo)) & addrmask; 146 if ((startblk ^ EROFS_NULL_ADDR) & addrmask) { 147 map->m_deviceid = le16_to_cpu(idx->device_id) & 148 EROFS_SB(sb)->device_id_mask; 149 map->m_pa = erofs_pos(sb, startblk); 150 map->m_flags = EROFS_MAP_MAPPED; 151 } 152 } else { 153 startblk = le32_to_cpu(*(__le32 *)idx); 154 if (startblk != (u32)EROFS_NULL_ADDR) { 155 map->m_pa = erofs_pos(sb, startblk); 156 map->m_flags = EROFS_MAP_MAPPED; 157 } 158 } 159 erofs_put_metabuf(&buf); 160 out: 161 if (!err) { 162 map->m_plen = map->m_llen; 163 /* inline data should be located in the same meta block */ 164 if ((map->m_flags & EROFS_MAP_META) && 165 erofs_blkoff(sb, map->m_pa) + map->m_plen > blksz) { 166 erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid); 167 DBG_BUGON(1); 168 return -EFSCORRUPTED; 169 } 170 } 171 trace_erofs_map_blocks_exit(inode, map, 0, err); 172 return err; 173 } 174 175 static void erofs_fill_from_devinfo(struct erofs_map_dev *map, 176 struct super_block *sb, struct erofs_device_info *dif) 177 { 178 map->m_sb = sb; 179 map->m_dif = dif; 180 map->m_bdev = NULL; 181 if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode)) 182 map->m_bdev = file_bdev(dif->file); 183 } 184 185 int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) 186 { 187 struct erofs_dev_context *devs = EROFS_SB(sb)->devs; 188 struct erofs_device_info *dif; 189 erofs_off_t startoff; 190 int id; 191 192 erofs_fill_from_devinfo(map, sb, &EROFS_SB(sb)->dif0); 193 map->m_bdev = sb->s_bdev; /* use s_bdev for the primary device */ 194 if (map->m_deviceid) { 195 down_read(&devs->rwsem); 196 dif = idr_find(&devs->tree, map->m_deviceid - 1); 197 if (!dif) { 198 up_read(&devs->rwsem); 199 return -ENODEV; 200 } 201 if (devs->flatdev) { 202 map->m_pa += erofs_pos(sb, dif->uniaddr); 203 up_read(&devs->rwsem); 204 return 0; 205 } 206 erofs_fill_from_devinfo(map, sb, dif); 207 up_read(&devs->rwsem); 208 } else if (devs->extra_devices && !devs->flatdev) { 209 down_read(&devs->rwsem); 210 idr_for_each_entry(&devs->tree, dif, id) { 211 if (!dif->uniaddr) 212 continue; 213 214 startoff = erofs_pos(sb, dif->uniaddr); 215 if (map->m_pa >= startoff && 216 map->m_pa < startoff + erofs_pos(sb, dif->blocks)) { 217 map->m_pa -= startoff; 218 erofs_fill_from_devinfo(map, sb, dif); 219 break; 220 } 221 } 222 up_read(&devs->rwsem); 223 } 224 return 0; 225 } 226 227 /* 228 * bit 30: I/O error occurred on this folio 229 * bit 29: CPU has dirty data in D-cache (needs aliasing handling); 230 * bit 0 - 29: remaining parts to complete this folio 231 */ 232 #define EROFS_ONLINEFOLIO_EIO 30 233 #define EROFS_ONLINEFOLIO_DIRTY 29 234 235 void erofs_onlinefolio_init(struct folio *folio) 236 { 237 union { 238 atomic_t o; 239 void *v; 240 } u = { .o = ATOMIC_INIT(1) }; 241 242 folio->private = u.v; /* valid only if file-backed folio is locked */ 243 } 244 245 void erofs_onlinefolio_split(struct folio *folio) 246 { 247 atomic_inc((atomic_t *)&folio->private); 248 } 249 250 void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty) 251 { 252 int orig, v; 253 254 do { 255 orig = atomic_read((atomic_t *)&folio->private); 256 DBG_BUGON(orig <= 0); 257 v = dirty << EROFS_ONLINEFOLIO_DIRTY; 258 v |= (orig - 1) | (!!err << EROFS_ONLINEFOLIO_EIO); 259 } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig); 260 261 if (v & (BIT(EROFS_ONLINEFOLIO_DIRTY) - 1)) 262 return; 263 folio->private = 0; 264 if (v & BIT(EROFS_ONLINEFOLIO_DIRTY)) 265 flush_dcache_folio(folio); 266 folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO))); 267 } 268 269 static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 270 unsigned int flags, struct iomap *iomap, struct iomap *srcmap) 271 { 272 int ret; 273 struct super_block *sb = inode->i_sb; 274 struct erofs_map_blocks map; 275 struct erofs_map_dev mdev; 276 277 map.m_la = offset; 278 map.m_llen = length; 279 ret = erofs_map_blocks(inode, &map); 280 if (ret < 0) 281 return ret; 282 283 iomap->offset = map.m_la; 284 iomap->length = map.m_llen; 285 iomap->flags = 0; 286 iomap->private = NULL; 287 iomap->addr = IOMAP_NULL_ADDR; 288 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 289 iomap->type = IOMAP_HOLE; 290 return 0; 291 } 292 293 if (!(map.m_flags & EROFS_MAP_META) || !erofs_inode_in_metabox(inode)) { 294 mdev = (struct erofs_map_dev) { 295 .m_deviceid = map.m_deviceid, 296 .m_pa = map.m_pa, 297 }; 298 ret = erofs_map_dev(sb, &mdev); 299 if (ret) 300 return ret; 301 302 if (flags & IOMAP_DAX) 303 iomap->dax_dev = mdev.m_dif->dax_dev; 304 else 305 iomap->bdev = mdev.m_bdev; 306 iomap->addr = mdev.m_dif->fsoff + mdev.m_pa; 307 if (flags & IOMAP_DAX) 308 iomap->addr += mdev.m_dif->dax_part_off; 309 } 310 311 if (map.m_flags & EROFS_MAP_META) { 312 void *ptr; 313 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 314 315 iomap->type = IOMAP_INLINE; 316 ptr = erofs_read_metabuf(&buf, sb, map.m_pa, 317 erofs_inode_in_metabox(inode)); 318 if (IS_ERR(ptr)) 319 return PTR_ERR(ptr); 320 iomap->inline_data = ptr; 321 iomap->private = buf.base; 322 } else { 323 iomap->type = IOMAP_MAPPED; 324 } 325 return 0; 326 } 327 328 static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length, 329 ssize_t written, unsigned int flags, struct iomap *iomap) 330 { 331 void *ptr = iomap->private; 332 333 if (ptr) { 334 struct erofs_buf buf = { 335 .page = kmap_to_page(ptr), 336 .base = ptr, 337 }; 338 339 DBG_BUGON(iomap->type != IOMAP_INLINE); 340 erofs_put_metabuf(&buf); 341 } else { 342 DBG_BUGON(iomap->type == IOMAP_INLINE); 343 } 344 return written; 345 } 346 347 static const struct iomap_ops erofs_iomap_ops = { 348 .iomap_begin = erofs_iomap_begin, 349 .iomap_end = erofs_iomap_end, 350 }; 351 352 int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 353 u64 start, u64 len) 354 { 355 if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) { 356 #ifdef CONFIG_EROFS_FS_ZIP 357 return iomap_fiemap(inode, fieinfo, start, len, 358 &z_erofs_iomap_report_ops); 359 #else 360 return -EOPNOTSUPP; 361 #endif 362 } 363 return iomap_fiemap(inode, fieinfo, start, len, &erofs_iomap_ops); 364 } 365 366 /* 367 * since we dont have write or truncate flows, so no inode 368 * locking needs to be held at the moment. 369 */ 370 static int erofs_read_folio(struct file *file, struct folio *folio) 371 { 372 trace_erofs_read_folio(folio, true); 373 374 return iomap_read_folio(folio, &erofs_iomap_ops); 375 } 376 377 static void erofs_readahead(struct readahead_control *rac) 378 { 379 trace_erofs_readahead(rac->mapping->host, readahead_index(rac), 380 readahead_count(rac), true); 381 382 return iomap_readahead(rac, &erofs_iomap_ops); 383 } 384 385 static sector_t erofs_bmap(struct address_space *mapping, sector_t block) 386 { 387 return iomap_bmap(mapping, block, &erofs_iomap_ops); 388 } 389 390 static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 391 { 392 struct inode *inode = file_inode(iocb->ki_filp); 393 394 /* no need taking (shared) inode lock since it's a ro filesystem */ 395 if (!iov_iter_count(to)) 396 return 0; 397 398 #ifdef CONFIG_FS_DAX 399 if (IS_DAX(inode)) 400 return dax_iomap_rw(iocb, to, &erofs_iomap_ops); 401 #endif 402 if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev) 403 return iomap_dio_rw(iocb, to, &erofs_iomap_ops, 404 NULL, 0, NULL, 0); 405 return filemap_read(iocb, to, 0); 406 } 407 408 /* for uncompressed (aligned) files and raw access for other files */ 409 const struct address_space_operations erofs_aops = { 410 .read_folio = erofs_read_folio, 411 .readahead = erofs_readahead, 412 .bmap = erofs_bmap, 413 .direct_IO = noop_direct_IO, 414 .release_folio = iomap_release_folio, 415 .invalidate_folio = iomap_invalidate_folio, 416 }; 417 418 #ifdef CONFIG_FS_DAX 419 static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf, 420 unsigned int order) 421 { 422 return dax_iomap_fault(vmf, order, NULL, NULL, &erofs_iomap_ops); 423 } 424 425 static vm_fault_t erofs_dax_fault(struct vm_fault *vmf) 426 { 427 return erofs_dax_huge_fault(vmf, 0); 428 } 429 430 static const struct vm_operations_struct erofs_dax_vm_ops = { 431 .fault = erofs_dax_fault, 432 .huge_fault = erofs_dax_huge_fault, 433 }; 434 435 static int erofs_file_mmap_prepare(struct vm_area_desc *desc) 436 { 437 if (!IS_DAX(file_inode(desc->file))) 438 return generic_file_readonly_mmap_prepare(desc); 439 440 if ((desc->vm_flags & VM_SHARED) && (desc->vm_flags & VM_MAYWRITE)) 441 return -EINVAL; 442 443 desc->vm_ops = &erofs_dax_vm_ops; 444 desc->vm_flags |= VM_HUGEPAGE; 445 return 0; 446 } 447 #else 448 #define erofs_file_mmap_prepare generic_file_readonly_mmap_prepare 449 #endif 450 451 static loff_t erofs_file_llseek(struct file *file, loff_t offset, int whence) 452 { 453 struct inode *inode = file->f_mapping->host; 454 const struct iomap_ops *ops = &erofs_iomap_ops; 455 456 if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) 457 #ifdef CONFIG_EROFS_FS_ZIP 458 ops = &z_erofs_iomap_report_ops; 459 #else 460 return generic_file_llseek(file, offset, whence); 461 #endif 462 463 if (whence == SEEK_HOLE) 464 offset = iomap_seek_hole(inode, offset, ops); 465 else if (whence == SEEK_DATA) 466 offset = iomap_seek_data(inode, offset, ops); 467 else 468 return generic_file_llseek(file, offset, whence); 469 470 if (offset < 0) 471 return offset; 472 return vfs_setpos(file, offset, inode->i_sb->s_maxbytes); 473 } 474 475 const struct file_operations erofs_file_fops = { 476 .llseek = erofs_file_llseek, 477 .read_iter = erofs_file_read_iter, 478 .mmap_prepare = erofs_file_mmap_prepare, 479 .get_unmapped_area = thp_get_unmapped_area, 480 .splice_read = filemap_splice_read, 481 }; 482