1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017-2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 * Copyright (C) 2021, Alibaba Cloud 6 */ 7 #include "internal.h" 8 #include <linux/prefetch.h> 9 #include <linux/sched/mm.h> 10 #include <linux/dax.h> 11 #include <trace/events/erofs.h> 12 13 void erofs_unmap_metabuf(struct erofs_buf *buf) 14 { 15 if (buf->kmap_type == EROFS_KMAP) 16 kunmap_local(buf->base); 17 buf->base = NULL; 18 buf->kmap_type = EROFS_NO_KMAP; 19 } 20 21 void erofs_put_metabuf(struct erofs_buf *buf) 22 { 23 if (!buf->page) 24 return; 25 erofs_unmap_metabuf(buf); 26 put_page(buf->page); 27 buf->page = NULL; 28 } 29 30 void *erofs_bread(struct erofs_buf *buf, struct inode *inode, 31 erofs_blk_t blkaddr, enum erofs_kmap_type type) 32 { 33 struct address_space *const mapping = inode->i_mapping; 34 erofs_off_t offset = blknr_to_addr(blkaddr); 35 pgoff_t index = offset >> PAGE_SHIFT; 36 struct page *page = buf->page; 37 struct folio *folio; 38 unsigned int nofs_flag; 39 40 if (!page || page->index != index) { 41 erofs_put_metabuf(buf); 42 43 nofs_flag = memalloc_nofs_save(); 44 folio = read_cache_folio(mapping, index, NULL, NULL); 45 memalloc_nofs_restore(nofs_flag); 46 if (IS_ERR(folio)) 47 return folio; 48 49 /* should already be PageUptodate, no need to lock page */ 50 page = folio_file_page(folio, index); 51 buf->page = page; 52 } 53 if (buf->kmap_type == EROFS_NO_KMAP) { 54 if (type == EROFS_KMAP) 55 buf->base = kmap_local_page(page); 56 buf->kmap_type = type; 57 } else if (buf->kmap_type != type) { 58 DBG_BUGON(1); 59 return ERR_PTR(-EFAULT); 60 } 61 if (type == EROFS_NO_KMAP) 62 return NULL; 63 return buf->base + (offset & ~PAGE_MASK); 64 } 65 66 void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, 67 erofs_blk_t blkaddr, enum erofs_kmap_type type) 68 { 69 if (erofs_is_fscache_mode(sb)) 70 return erofs_bread(buf, EROFS_SB(sb)->s_fscache->inode, 71 blkaddr, type); 72 73 return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type); 74 } 75 76 static int erofs_map_blocks_flatmode(struct inode *inode, 77 struct erofs_map_blocks *map) 78 { 79 erofs_blk_t nblocks, lastblk; 80 u64 offset = map->m_la; 81 struct erofs_inode *vi = EROFS_I(inode); 82 bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); 83 84 nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ); 85 lastblk = nblocks - tailendpacking; 86 87 /* there is no hole in flatmode */ 88 map->m_flags = EROFS_MAP_MAPPED; 89 if (offset < blknr_to_addr(lastblk)) { 90 map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la; 91 map->m_plen = blknr_to_addr(lastblk) - offset; 92 } else if (tailendpacking) { 93 map->m_pa = erofs_iloc(inode) + vi->inode_isize + 94 vi->xattr_isize + erofs_blkoff(offset); 95 map->m_plen = inode->i_size - offset; 96 97 /* inline data should be located in the same meta block */ 98 if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) { 99 erofs_err(inode->i_sb, 100 "inline data cross block boundary @ nid %llu", 101 vi->nid); 102 DBG_BUGON(1); 103 return -EFSCORRUPTED; 104 } 105 map->m_flags |= EROFS_MAP_META; 106 } else { 107 erofs_err(inode->i_sb, 108 "internal error @ nid: %llu (size %llu), m_la 0x%llx", 109 vi->nid, inode->i_size, map->m_la); 110 DBG_BUGON(1); 111 return -EIO; 112 } 113 return 0; 114 } 115 116 int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) 117 { 118 struct super_block *sb = inode->i_sb; 119 struct erofs_inode *vi = EROFS_I(inode); 120 struct erofs_inode_chunk_index *idx; 121 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 122 u64 chunknr; 123 unsigned int unit; 124 erofs_off_t pos; 125 void *kaddr; 126 int err = 0; 127 128 trace_erofs_map_blocks_enter(inode, map, 0); 129 map->m_deviceid = 0; 130 if (map->m_la >= inode->i_size) { 131 /* leave out-of-bound access unmapped */ 132 map->m_flags = 0; 133 map->m_plen = 0; 134 goto out; 135 } 136 137 if (vi->datalayout != EROFS_INODE_CHUNK_BASED) { 138 err = erofs_map_blocks_flatmode(inode, map); 139 goto out; 140 } 141 142 if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES) 143 unit = sizeof(*idx); /* chunk index */ 144 else 145 unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */ 146 147 chunknr = map->m_la >> vi->chunkbits; 148 pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + 149 vi->xattr_isize, unit) + unit * chunknr; 150 151 kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP); 152 if (IS_ERR(kaddr)) { 153 err = PTR_ERR(kaddr); 154 goto out; 155 } 156 map->m_la = chunknr << vi->chunkbits; 157 map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits, 158 roundup(inode->i_size - map->m_la, EROFS_BLKSIZ)); 159 160 /* handle block map */ 161 if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) { 162 __le32 *blkaddr = kaddr + erofs_blkoff(pos); 163 164 if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) { 165 map->m_flags = 0; 166 } else { 167 map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr)); 168 map->m_flags = EROFS_MAP_MAPPED; 169 } 170 goto out_unlock; 171 } 172 /* parse chunk indexes */ 173 idx = kaddr + erofs_blkoff(pos); 174 switch (le32_to_cpu(idx->blkaddr)) { 175 case EROFS_NULL_ADDR: 176 map->m_flags = 0; 177 break; 178 default: 179 map->m_deviceid = le16_to_cpu(idx->device_id) & 180 EROFS_SB(sb)->device_id_mask; 181 map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr)); 182 map->m_flags = EROFS_MAP_MAPPED; 183 break; 184 } 185 out_unlock: 186 erofs_put_metabuf(&buf); 187 out: 188 if (!err) 189 map->m_llen = map->m_plen; 190 trace_erofs_map_blocks_exit(inode, map, 0, err); 191 return err; 192 } 193 194 int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) 195 { 196 struct erofs_dev_context *devs = EROFS_SB(sb)->devs; 197 struct erofs_device_info *dif; 198 int id; 199 200 /* primary device by default */ 201 map->m_bdev = sb->s_bdev; 202 map->m_daxdev = EROFS_SB(sb)->dax_dev; 203 map->m_dax_part_off = EROFS_SB(sb)->dax_part_off; 204 map->m_fscache = EROFS_SB(sb)->s_fscache; 205 206 if (map->m_deviceid) { 207 down_read(&devs->rwsem); 208 dif = idr_find(&devs->tree, map->m_deviceid - 1); 209 if (!dif) { 210 up_read(&devs->rwsem); 211 return -ENODEV; 212 } 213 map->m_bdev = dif->bdev; 214 map->m_daxdev = dif->dax_dev; 215 map->m_dax_part_off = dif->dax_part_off; 216 map->m_fscache = dif->fscache; 217 up_read(&devs->rwsem); 218 } else if (devs->extra_devices) { 219 down_read(&devs->rwsem); 220 idr_for_each_entry(&devs->tree, dif, id) { 221 erofs_off_t startoff, length; 222 223 if (!dif->mapped_blkaddr) 224 continue; 225 startoff = blknr_to_addr(dif->mapped_blkaddr); 226 length = blknr_to_addr(dif->blocks); 227 228 if (map->m_pa >= startoff && 229 map->m_pa < startoff + length) { 230 map->m_pa -= startoff; 231 map->m_bdev = dif->bdev; 232 map->m_daxdev = dif->dax_dev; 233 map->m_dax_part_off = dif->dax_part_off; 234 map->m_fscache = dif->fscache; 235 break; 236 } 237 } 238 up_read(&devs->rwsem); 239 } 240 return 0; 241 } 242 243 static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 244 unsigned int flags, struct iomap *iomap, struct iomap *srcmap) 245 { 246 int ret; 247 struct erofs_map_blocks map; 248 struct erofs_map_dev mdev; 249 250 map.m_la = offset; 251 map.m_llen = length; 252 253 ret = erofs_map_blocks(inode, &map); 254 if (ret < 0) 255 return ret; 256 257 mdev = (struct erofs_map_dev) { 258 .m_deviceid = map.m_deviceid, 259 .m_pa = map.m_pa, 260 }; 261 ret = erofs_map_dev(inode->i_sb, &mdev); 262 if (ret) 263 return ret; 264 265 iomap->offset = map.m_la; 266 if (flags & IOMAP_DAX) 267 iomap->dax_dev = mdev.m_daxdev; 268 else 269 iomap->bdev = mdev.m_bdev; 270 iomap->length = map.m_llen; 271 iomap->flags = 0; 272 iomap->private = NULL; 273 274 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 275 iomap->type = IOMAP_HOLE; 276 iomap->addr = IOMAP_NULL_ADDR; 277 if (!iomap->length) 278 iomap->length = length; 279 return 0; 280 } 281 282 if (map.m_flags & EROFS_MAP_META) { 283 void *ptr; 284 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 285 286 iomap->type = IOMAP_INLINE; 287 ptr = erofs_read_metabuf(&buf, inode->i_sb, 288 erofs_blknr(mdev.m_pa), EROFS_KMAP); 289 if (IS_ERR(ptr)) 290 return PTR_ERR(ptr); 291 iomap->inline_data = ptr + erofs_blkoff(mdev.m_pa); 292 iomap->private = buf.base; 293 } else { 294 iomap->type = IOMAP_MAPPED; 295 iomap->addr = mdev.m_pa; 296 if (flags & IOMAP_DAX) 297 iomap->addr += mdev.m_dax_part_off; 298 } 299 return 0; 300 } 301 302 static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length, 303 ssize_t written, unsigned int flags, struct iomap *iomap) 304 { 305 void *ptr = iomap->private; 306 307 if (ptr) { 308 struct erofs_buf buf = { 309 .page = kmap_to_page(ptr), 310 .base = ptr, 311 .kmap_type = EROFS_KMAP, 312 }; 313 314 DBG_BUGON(iomap->type != IOMAP_INLINE); 315 erofs_put_metabuf(&buf); 316 } else { 317 DBG_BUGON(iomap->type == IOMAP_INLINE); 318 } 319 return written; 320 } 321 322 static const struct iomap_ops erofs_iomap_ops = { 323 .iomap_begin = erofs_iomap_begin, 324 .iomap_end = erofs_iomap_end, 325 }; 326 327 int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 328 u64 start, u64 len) 329 { 330 if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) { 331 #ifdef CONFIG_EROFS_FS_ZIP 332 return iomap_fiemap(inode, fieinfo, start, len, 333 &z_erofs_iomap_report_ops); 334 #else 335 return -EOPNOTSUPP; 336 #endif 337 } 338 return iomap_fiemap(inode, fieinfo, start, len, &erofs_iomap_ops); 339 } 340 341 /* 342 * since we dont have write or truncate flows, so no inode 343 * locking needs to be held at the moment. 344 */ 345 static int erofs_read_folio(struct file *file, struct folio *folio) 346 { 347 return iomap_read_folio(folio, &erofs_iomap_ops); 348 } 349 350 static void erofs_readahead(struct readahead_control *rac) 351 { 352 return iomap_readahead(rac, &erofs_iomap_ops); 353 } 354 355 static sector_t erofs_bmap(struct address_space *mapping, sector_t block) 356 { 357 return iomap_bmap(mapping, block, &erofs_iomap_ops); 358 } 359 360 static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 361 { 362 struct inode *inode = file_inode(iocb->ki_filp); 363 364 /* no need taking (shared) inode lock since it's a ro filesystem */ 365 if (!iov_iter_count(to)) 366 return 0; 367 368 #ifdef CONFIG_FS_DAX 369 if (IS_DAX(inode)) 370 return dax_iomap_rw(iocb, to, &erofs_iomap_ops); 371 #endif 372 if (iocb->ki_flags & IOCB_DIRECT) { 373 struct block_device *bdev = inode->i_sb->s_bdev; 374 unsigned int blksize_mask; 375 376 if (bdev) 377 blksize_mask = bdev_logical_block_size(bdev) - 1; 378 else 379 blksize_mask = (1 << inode->i_blkbits) - 1; 380 381 if ((iocb->ki_pos | iov_iter_count(to) | 382 iov_iter_alignment(to)) & blksize_mask) 383 return -EINVAL; 384 385 return iomap_dio_rw(iocb, to, &erofs_iomap_ops, 386 NULL, 0, NULL, 0); 387 } 388 return filemap_read(iocb, to, 0); 389 } 390 391 /* for uncompressed (aligned) files and raw access for other files */ 392 const struct address_space_operations erofs_raw_access_aops = { 393 .read_folio = erofs_read_folio, 394 .readahead = erofs_readahead, 395 .bmap = erofs_bmap, 396 .direct_IO = noop_direct_IO, 397 .release_folio = iomap_release_folio, 398 .invalidate_folio = iomap_invalidate_folio, 399 }; 400 401 #ifdef CONFIG_FS_DAX 402 static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf, 403 enum page_entry_size pe_size) 404 { 405 return dax_iomap_fault(vmf, pe_size, NULL, NULL, &erofs_iomap_ops); 406 } 407 408 static vm_fault_t erofs_dax_fault(struct vm_fault *vmf) 409 { 410 return erofs_dax_huge_fault(vmf, PE_SIZE_PTE); 411 } 412 413 static const struct vm_operations_struct erofs_dax_vm_ops = { 414 .fault = erofs_dax_fault, 415 .huge_fault = erofs_dax_huge_fault, 416 }; 417 418 static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma) 419 { 420 if (!IS_DAX(file_inode(file))) 421 return generic_file_readonly_mmap(file, vma); 422 423 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) 424 return -EINVAL; 425 426 vma->vm_ops = &erofs_dax_vm_ops; 427 vm_flags_set(vma, VM_HUGEPAGE); 428 return 0; 429 } 430 #else 431 #define erofs_file_mmap generic_file_readonly_mmap 432 #endif 433 434 const struct file_operations erofs_file_fops = { 435 .llseek = generic_file_llseek, 436 .read_iter = erofs_file_read_iter, 437 .mmap = erofs_file_mmap, 438 .splice_read = generic_file_splice_read, 439 }; 440