1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017-2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 * Copyright (C) 2021, Alibaba Cloud 6 */ 7 #include "internal.h" 8 #include <linux/sched/mm.h> 9 #include <trace/events/erofs.h> 10 11 void erofs_unmap_metabuf(struct erofs_buf *buf) 12 { 13 if (buf->kmap_type == EROFS_KMAP) 14 kunmap_local(buf->base); 15 buf->base = NULL; 16 buf->kmap_type = EROFS_NO_KMAP; 17 } 18 19 void erofs_put_metabuf(struct erofs_buf *buf) 20 { 21 if (!buf->page) 22 return; 23 erofs_unmap_metabuf(buf); 24 folio_put(page_folio(buf->page)); 25 buf->page = NULL; 26 } 27 28 void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, 29 enum erofs_kmap_type type) 30 { 31 pgoff_t index = offset >> PAGE_SHIFT; 32 struct folio *folio = NULL; 33 34 if (buf->page) { 35 folio = page_folio(buf->page); 36 if (folio_file_page(folio, index) != buf->page) 37 erofs_unmap_metabuf(buf); 38 } 39 if (!folio || !folio_contains(folio, index)) { 40 erofs_put_metabuf(buf); 41 folio = read_mapping_folio(buf->mapping, index, NULL); 42 if (IS_ERR(folio)) 43 return folio; 44 } 45 buf->page = folio_file_page(folio, index); 46 47 if (buf->kmap_type == EROFS_NO_KMAP) { 48 if (type == EROFS_KMAP) 49 buf->base = kmap_local_page(buf->page); 50 buf->kmap_type = type; 51 } else if (buf->kmap_type != type) { 52 DBG_BUGON(1); 53 return ERR_PTR(-EFAULT); 54 } 55 if (type == EROFS_NO_KMAP) 56 return NULL; 57 return buf->base + (offset & ~PAGE_MASK); 58 } 59 60 void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb) 61 { 62 struct erofs_sb_info *sbi = EROFS_SB(sb); 63 64 if (erofs_is_fileio_mode(sbi)) 65 buf->mapping = file_inode(sbi->fdev)->i_mapping; 66 else if (erofs_is_fscache_mode(sb)) 67 buf->mapping = sbi->s_fscache->inode->i_mapping; 68 else 69 buf->mapping = sb->s_bdev->bd_mapping; 70 } 71 72 void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, 73 erofs_off_t offset, enum erofs_kmap_type type) 74 { 75 erofs_init_metabuf(buf, sb); 76 return erofs_bread(buf, offset, type); 77 } 78 79 static int erofs_map_blocks_flatmode(struct inode *inode, 80 struct erofs_map_blocks *map) 81 { 82 struct erofs_inode *vi = EROFS_I(inode); 83 struct super_block *sb = inode->i_sb; 84 bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); 85 erofs_blk_t lastblk = erofs_iblks(inode) - tailendpacking; 86 87 map->m_flags = EROFS_MAP_MAPPED; /* no hole in flat inodes */ 88 if (map->m_la < erofs_pos(sb, lastblk)) { 89 map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la; 90 map->m_plen = erofs_pos(sb, lastblk) - map->m_la; 91 } else { 92 DBG_BUGON(!tailendpacking); 93 map->m_pa = erofs_iloc(inode) + vi->inode_isize + 94 vi->xattr_isize + erofs_blkoff(sb, map->m_la); 95 map->m_plen = inode->i_size - map->m_la; 96 97 /* inline data should be located in the same meta block */ 98 if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) { 99 erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid); 100 DBG_BUGON(1); 101 return -EFSCORRUPTED; 102 } 103 map->m_flags |= EROFS_MAP_META; 104 } 105 return 0; 106 } 107 108 int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) 109 { 110 struct super_block *sb = inode->i_sb; 111 struct erofs_inode *vi = EROFS_I(inode); 112 struct erofs_inode_chunk_index *idx; 113 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 114 u64 chunknr; 115 unsigned int unit; 116 erofs_off_t pos; 117 void *kaddr; 118 int err = 0; 119 120 trace_erofs_map_blocks_enter(inode, map, 0); 121 map->m_deviceid = 0; 122 if (map->m_la >= inode->i_size) { 123 /* leave out-of-bound access unmapped */ 124 map->m_flags = 0; 125 map->m_plen = map->m_llen; 126 goto out; 127 } 128 129 if (vi->datalayout != EROFS_INODE_CHUNK_BASED) { 130 err = erofs_map_blocks_flatmode(inode, map); 131 goto out; 132 } 133 134 if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES) 135 unit = sizeof(*idx); /* chunk index */ 136 else 137 unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */ 138 139 chunknr = map->m_la >> vi->chunkbits; 140 pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + 141 vi->xattr_isize, unit) + unit * chunknr; 142 143 kaddr = erofs_read_metabuf(&buf, sb, pos, EROFS_KMAP); 144 if (IS_ERR(kaddr)) { 145 err = PTR_ERR(kaddr); 146 goto out; 147 } 148 map->m_la = chunknr << vi->chunkbits; 149 map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits, 150 round_up(inode->i_size - map->m_la, sb->s_blocksize)); 151 152 /* handle block map */ 153 if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) { 154 __le32 *blkaddr = kaddr; 155 156 if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) { 157 map->m_flags = 0; 158 } else { 159 map->m_pa = erofs_pos(sb, le32_to_cpu(*blkaddr)); 160 map->m_flags = EROFS_MAP_MAPPED; 161 } 162 goto out_unlock; 163 } 164 /* parse chunk indexes */ 165 idx = kaddr; 166 switch (le32_to_cpu(idx->blkaddr)) { 167 case EROFS_NULL_ADDR: 168 map->m_flags = 0; 169 break; 170 default: 171 map->m_deviceid = le16_to_cpu(idx->device_id) & 172 EROFS_SB(sb)->device_id_mask; 173 map->m_pa = erofs_pos(sb, le32_to_cpu(idx->blkaddr)); 174 map->m_flags = EROFS_MAP_MAPPED; 175 break; 176 } 177 out_unlock: 178 erofs_put_metabuf(&buf); 179 out: 180 if (!err) 181 map->m_llen = map->m_plen; 182 trace_erofs_map_blocks_exit(inode, map, 0, err); 183 return err; 184 } 185 186 static void erofs_fill_from_devinfo(struct erofs_map_dev *map, 187 struct erofs_device_info *dif) 188 { 189 map->m_bdev = NULL; 190 map->m_fp = NULL; 191 if (dif->file) { 192 if (S_ISBLK(file_inode(dif->file)->i_mode)) 193 map->m_bdev = file_bdev(dif->file); 194 else 195 map->m_fp = dif->file; 196 } 197 map->m_daxdev = dif->dax_dev; 198 map->m_dax_part_off = dif->dax_part_off; 199 map->m_fscache = dif->fscache; 200 } 201 202 int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) 203 { 204 struct erofs_dev_context *devs = EROFS_SB(sb)->devs; 205 struct erofs_device_info *dif; 206 erofs_off_t startoff, length; 207 int id; 208 209 map->m_bdev = sb->s_bdev; 210 map->m_daxdev = EROFS_SB(sb)->dax_dev; 211 map->m_dax_part_off = EROFS_SB(sb)->dax_part_off; 212 map->m_fscache = EROFS_SB(sb)->s_fscache; 213 map->m_fp = EROFS_SB(sb)->fdev; 214 215 if (map->m_deviceid) { 216 down_read(&devs->rwsem); 217 dif = idr_find(&devs->tree, map->m_deviceid - 1); 218 if (!dif) { 219 up_read(&devs->rwsem); 220 return -ENODEV; 221 } 222 if (devs->flatdev) { 223 map->m_pa += erofs_pos(sb, dif->mapped_blkaddr); 224 up_read(&devs->rwsem); 225 return 0; 226 } 227 erofs_fill_from_devinfo(map, dif); 228 up_read(&devs->rwsem); 229 } else if (devs->extra_devices && !devs->flatdev) { 230 down_read(&devs->rwsem); 231 idr_for_each_entry(&devs->tree, dif, id) { 232 if (!dif->mapped_blkaddr) 233 continue; 234 235 startoff = erofs_pos(sb, dif->mapped_blkaddr); 236 length = erofs_pos(sb, dif->blocks); 237 if (map->m_pa >= startoff && 238 map->m_pa < startoff + length) { 239 map->m_pa -= startoff; 240 erofs_fill_from_devinfo(map, dif); 241 break; 242 } 243 } 244 up_read(&devs->rwsem); 245 } 246 return 0; 247 } 248 249 /* 250 * bit 30: I/O error occurred on this folio 251 * bit 0 - 29: remaining parts to complete this folio 252 */ 253 #define EROFS_ONLINEFOLIO_EIO (1 << 30) 254 255 void erofs_onlinefolio_init(struct folio *folio) 256 { 257 union { 258 atomic_t o; 259 void *v; 260 } u = { .o = ATOMIC_INIT(1) }; 261 262 folio->private = u.v; /* valid only if file-backed folio is locked */ 263 } 264 265 void erofs_onlinefolio_split(struct folio *folio) 266 { 267 atomic_inc((atomic_t *)&folio->private); 268 } 269 270 void erofs_onlinefolio_end(struct folio *folio, int err) 271 { 272 int orig, v; 273 274 do { 275 orig = atomic_read((atomic_t *)&folio->private); 276 v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0); 277 } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig); 278 279 if (v & ~EROFS_ONLINEFOLIO_EIO) 280 return; 281 folio->private = 0; 282 folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO)); 283 } 284 285 static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 286 unsigned int flags, struct iomap *iomap, struct iomap *srcmap) 287 { 288 int ret; 289 struct super_block *sb = inode->i_sb; 290 struct erofs_map_blocks map; 291 struct erofs_map_dev mdev; 292 293 map.m_la = offset; 294 map.m_llen = length; 295 296 ret = erofs_map_blocks(inode, &map); 297 if (ret < 0) 298 return ret; 299 300 mdev = (struct erofs_map_dev) { 301 .m_deviceid = map.m_deviceid, 302 .m_pa = map.m_pa, 303 }; 304 ret = erofs_map_dev(sb, &mdev); 305 if (ret) 306 return ret; 307 308 iomap->offset = map.m_la; 309 if (flags & IOMAP_DAX) 310 iomap->dax_dev = mdev.m_daxdev; 311 else 312 iomap->bdev = mdev.m_bdev; 313 iomap->length = map.m_llen; 314 iomap->flags = 0; 315 iomap->private = NULL; 316 317 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 318 iomap->type = IOMAP_HOLE; 319 iomap->addr = IOMAP_NULL_ADDR; 320 if (!iomap->length) 321 iomap->length = length; 322 return 0; 323 } 324 325 if (map.m_flags & EROFS_MAP_META) { 326 void *ptr; 327 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 328 329 iomap->type = IOMAP_INLINE; 330 ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa, EROFS_KMAP); 331 if (IS_ERR(ptr)) 332 return PTR_ERR(ptr); 333 iomap->inline_data = ptr; 334 iomap->private = buf.base; 335 } else { 336 iomap->type = IOMAP_MAPPED; 337 iomap->addr = mdev.m_pa; 338 if (flags & IOMAP_DAX) 339 iomap->addr += mdev.m_dax_part_off; 340 } 341 return 0; 342 } 343 344 static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length, 345 ssize_t written, unsigned int flags, struct iomap *iomap) 346 { 347 void *ptr = iomap->private; 348 349 if (ptr) { 350 struct erofs_buf buf = { 351 .page = kmap_to_page(ptr), 352 .base = ptr, 353 .kmap_type = EROFS_KMAP, 354 }; 355 356 DBG_BUGON(iomap->type != IOMAP_INLINE); 357 erofs_put_metabuf(&buf); 358 } else { 359 DBG_BUGON(iomap->type == IOMAP_INLINE); 360 } 361 return written; 362 } 363 364 static const struct iomap_ops erofs_iomap_ops = { 365 .iomap_begin = erofs_iomap_begin, 366 .iomap_end = erofs_iomap_end, 367 }; 368 369 int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 370 u64 start, u64 len) 371 { 372 if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) { 373 #ifdef CONFIG_EROFS_FS_ZIP 374 return iomap_fiemap(inode, fieinfo, start, len, 375 &z_erofs_iomap_report_ops); 376 #else 377 return -EOPNOTSUPP; 378 #endif 379 } 380 return iomap_fiemap(inode, fieinfo, start, len, &erofs_iomap_ops); 381 } 382 383 /* 384 * since we dont have write or truncate flows, so no inode 385 * locking needs to be held at the moment. 386 */ 387 static int erofs_read_folio(struct file *file, struct folio *folio) 388 { 389 return iomap_read_folio(folio, &erofs_iomap_ops); 390 } 391 392 static void erofs_readahead(struct readahead_control *rac) 393 { 394 return iomap_readahead(rac, &erofs_iomap_ops); 395 } 396 397 static sector_t erofs_bmap(struct address_space *mapping, sector_t block) 398 { 399 return iomap_bmap(mapping, block, &erofs_iomap_ops); 400 } 401 402 static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 403 { 404 struct inode *inode = file_inode(iocb->ki_filp); 405 406 /* no need taking (shared) inode lock since it's a ro filesystem */ 407 if (!iov_iter_count(to)) 408 return 0; 409 410 #ifdef CONFIG_FS_DAX 411 if (IS_DAX(inode)) 412 return dax_iomap_rw(iocb, to, &erofs_iomap_ops); 413 #endif 414 if (iocb->ki_flags & IOCB_DIRECT) { 415 struct block_device *bdev = inode->i_sb->s_bdev; 416 unsigned int blksize_mask; 417 418 if (bdev) 419 blksize_mask = bdev_logical_block_size(bdev) - 1; 420 else 421 blksize_mask = i_blocksize(inode) - 1; 422 423 if ((iocb->ki_pos | iov_iter_count(to) | 424 iov_iter_alignment(to)) & blksize_mask) 425 return -EINVAL; 426 427 return iomap_dio_rw(iocb, to, &erofs_iomap_ops, 428 NULL, 0, NULL, 0); 429 } 430 return filemap_read(iocb, to, 0); 431 } 432 433 /* for uncompressed (aligned) files and raw access for other files */ 434 const struct address_space_operations erofs_aops = { 435 .read_folio = erofs_read_folio, 436 .readahead = erofs_readahead, 437 .bmap = erofs_bmap, 438 .direct_IO = noop_direct_IO, 439 .release_folio = iomap_release_folio, 440 .invalidate_folio = iomap_invalidate_folio, 441 }; 442 443 #ifdef CONFIG_FS_DAX 444 static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf, 445 unsigned int order) 446 { 447 return dax_iomap_fault(vmf, order, NULL, NULL, &erofs_iomap_ops); 448 } 449 450 static vm_fault_t erofs_dax_fault(struct vm_fault *vmf) 451 { 452 return erofs_dax_huge_fault(vmf, 0); 453 } 454 455 static const struct vm_operations_struct erofs_dax_vm_ops = { 456 .fault = erofs_dax_fault, 457 .huge_fault = erofs_dax_huge_fault, 458 }; 459 460 static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma) 461 { 462 if (!IS_DAX(file_inode(file))) 463 return generic_file_readonly_mmap(file, vma); 464 465 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) 466 return -EINVAL; 467 468 vma->vm_ops = &erofs_dax_vm_ops; 469 vm_flags_set(vma, VM_HUGEPAGE); 470 return 0; 471 } 472 #else 473 #define erofs_file_mmap generic_file_readonly_mmap 474 #endif 475 476 const struct file_operations erofs_file_fops = { 477 .llseek = generic_file_llseek, 478 .read_iter = erofs_file_read_iter, 479 .mmap = erofs_file_mmap, 480 .get_unmapped_area = thp_get_unmapped_area, 481 .splice_read = filemap_splice_read, 482 }; 483