1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * (C) 2001 Clemson University and The University of Chicago 4 * Copyright 2018 Omnibond Systems, L.L.C. 5 * 6 * See COPYING in top-level directory. 7 */ 8 9 /* 10 * Linux VFS inode operations. 11 */ 12 13 #include <linux/blkdev.h> 14 #include <linux/fileattr.h> 15 #include "protocol.h" 16 #include "orangefs-kernel.h" 17 #include "orangefs-bufmap.h" 18 19 static int orangefs_writepage_locked(struct page *page, 20 struct writeback_control *wbc) 21 { 22 struct inode *inode = page->mapping->host; 23 struct orangefs_write_range *wr = NULL; 24 struct iov_iter iter; 25 struct bio_vec bv; 26 size_t len, wlen; 27 ssize_t ret; 28 loff_t off; 29 30 set_page_writeback(page); 31 32 len = i_size_read(inode); 33 if (PagePrivate(page)) { 34 wr = (struct orangefs_write_range *)page_private(page); 35 WARN_ON(wr->pos >= len); 36 off = wr->pos; 37 if (off + wr->len > len) 38 wlen = len - off; 39 else 40 wlen = wr->len; 41 } else { 42 WARN_ON(1); 43 off = page_offset(page); 44 if (off + PAGE_SIZE > len) 45 wlen = len - off; 46 else 47 wlen = PAGE_SIZE; 48 } 49 /* Should've been handled in orangefs_invalidate_folio. */ 50 WARN_ON(off == len || off + wlen > len); 51 52 WARN_ON(wlen == 0); 53 bvec_set_page(&bv, page, wlen, off % PAGE_SIZE); 54 iov_iter_bvec(&iter, ITER_SOURCE, &bv, 1, wlen); 55 56 ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, wlen, 57 len, wr, NULL, NULL); 58 if (ret < 0) { 59 mapping_set_error(page->mapping, ret); 60 } else { 61 ret = 0; 62 } 63 kfree(detach_page_private(page)); 64 return ret; 65 } 66 67 static int orangefs_writepage(struct page *page, struct writeback_control *wbc) 68 { 69 int ret; 70 ret = orangefs_writepage_locked(page, wbc); 71 unlock_page(page); 72 end_page_writeback(page); 73 return ret; 74 } 75 76 struct orangefs_writepages { 77 loff_t off; 78 size_t len; 79 kuid_t uid; 80 kgid_t gid; 81 int maxpages; 82 int npages; 83 struct page **pages; 84 struct bio_vec *bv; 85 }; 86 87 static int orangefs_writepages_work(struct orangefs_writepages *ow, 88 struct writeback_control *wbc) 89 { 90 struct inode *inode = ow->pages[0]->mapping->host; 91 struct orangefs_write_range *wrp, wr; 92 struct iov_iter iter; 93 ssize_t ret; 94 size_t len; 95 loff_t off; 96 int i; 97 98 len = i_size_read(inode); 99 100 for (i = 0; i < ow->npages; i++) { 101 set_page_writeback(ow->pages[i]); 102 bvec_set_page(&ow->bv[i], ow->pages[i], 103 min(page_offset(ow->pages[i]) + PAGE_SIZE, 104 ow->off + ow->len) - 105 max(ow->off, page_offset(ow->pages[i])), 106 i == 0 ? ow->off - page_offset(ow->pages[i]) : 0); 107 } 108 iov_iter_bvec(&iter, ITER_SOURCE, ow->bv, ow->npages, ow->len); 109 110 WARN_ON(ow->off >= len); 111 if (ow->off + ow->len > len) 112 ow->len = len - ow->off; 113 114 off = ow->off; 115 wr.uid = ow->uid; 116 wr.gid = ow->gid; 117 ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, ow->len, 118 0, &wr, NULL, NULL); 119 if (ret < 0) { 120 for (i = 0; i < ow->npages; i++) { 121 mapping_set_error(ow->pages[i]->mapping, ret); 122 if (PagePrivate(ow->pages[i])) { 123 wrp = (struct orangefs_write_range *) 124 page_private(ow->pages[i]); 125 ClearPagePrivate(ow->pages[i]); 126 put_page(ow->pages[i]); 127 kfree(wrp); 128 } 129 end_page_writeback(ow->pages[i]); 130 unlock_page(ow->pages[i]); 131 } 132 } else { 133 ret = 0; 134 for (i = 0; i < ow->npages; i++) { 135 if (PagePrivate(ow->pages[i])) { 136 wrp = (struct orangefs_write_range *) 137 page_private(ow->pages[i]); 138 ClearPagePrivate(ow->pages[i]); 139 put_page(ow->pages[i]); 140 kfree(wrp); 141 } 142 end_page_writeback(ow->pages[i]); 143 unlock_page(ow->pages[i]); 144 } 145 } 146 return ret; 147 } 148 149 static int orangefs_writepages_callback(struct folio *folio, 150 struct writeback_control *wbc, void *data) 151 { 152 struct orangefs_writepages *ow = data; 153 struct orangefs_write_range *wr = folio->private; 154 int ret; 155 156 if (!wr) { 157 folio_unlock(folio); 158 /* It's not private so there's nothing to write, right? */ 159 printk("writepages_callback not private!\n"); 160 BUG(); 161 return 0; 162 } 163 164 ret = -1; 165 if (ow->npages == 0) { 166 ow->off = wr->pos; 167 ow->len = wr->len; 168 ow->uid = wr->uid; 169 ow->gid = wr->gid; 170 ow->pages[ow->npages++] = &folio->page; 171 ret = 0; 172 goto done; 173 } 174 if (!uid_eq(ow->uid, wr->uid) || !gid_eq(ow->gid, wr->gid)) { 175 orangefs_writepages_work(ow, wbc); 176 ow->npages = 0; 177 ret = -1; 178 goto done; 179 } 180 if (ow->off + ow->len == wr->pos) { 181 ow->len += wr->len; 182 ow->pages[ow->npages++] = &folio->page; 183 ret = 0; 184 goto done; 185 } 186 done: 187 if (ret == -1) { 188 if (ow->npages) { 189 orangefs_writepages_work(ow, wbc); 190 ow->npages = 0; 191 } 192 ret = orangefs_writepage_locked(&folio->page, wbc); 193 mapping_set_error(folio->mapping, ret); 194 folio_unlock(folio); 195 folio_end_writeback(folio); 196 } else { 197 if (ow->npages == ow->maxpages) { 198 orangefs_writepages_work(ow, wbc); 199 ow->npages = 0; 200 } 201 } 202 return ret; 203 } 204 205 static int orangefs_writepages(struct address_space *mapping, 206 struct writeback_control *wbc) 207 { 208 struct orangefs_writepages *ow; 209 struct blk_plug plug; 210 int ret; 211 ow = kzalloc(sizeof(struct orangefs_writepages), GFP_KERNEL); 212 if (!ow) 213 return -ENOMEM; 214 ow->maxpages = orangefs_bufmap_size_query()/PAGE_SIZE; 215 ow->pages = kcalloc(ow->maxpages, sizeof(struct page *), GFP_KERNEL); 216 if (!ow->pages) { 217 kfree(ow); 218 return -ENOMEM; 219 } 220 ow->bv = kcalloc(ow->maxpages, sizeof(struct bio_vec), GFP_KERNEL); 221 if (!ow->bv) { 222 kfree(ow->pages); 223 kfree(ow); 224 return -ENOMEM; 225 } 226 blk_start_plug(&plug); 227 ret = write_cache_pages(mapping, wbc, orangefs_writepages_callback, ow); 228 if (ow->npages) 229 ret = orangefs_writepages_work(ow, wbc); 230 blk_finish_plug(&plug); 231 kfree(ow->pages); 232 kfree(ow->bv); 233 kfree(ow); 234 return ret; 235 } 236 237 static int orangefs_launder_folio(struct folio *); 238 239 static void orangefs_readahead(struct readahead_control *rac) 240 { 241 loff_t offset; 242 struct iov_iter iter; 243 struct inode *inode = rac->mapping->host; 244 struct xarray *i_pages; 245 struct folio *folio; 246 loff_t new_start = readahead_pos(rac); 247 int ret; 248 size_t new_len = 0; 249 250 loff_t bytes_remaining = inode->i_size - readahead_pos(rac); 251 loff_t pages_remaining = bytes_remaining / PAGE_SIZE; 252 253 if (pages_remaining >= 1024) 254 new_len = 4194304; 255 else if (pages_remaining > readahead_count(rac)) 256 new_len = bytes_remaining; 257 258 if (new_len) 259 readahead_expand(rac, new_start, new_len); 260 261 offset = readahead_pos(rac); 262 i_pages = &rac->mapping->i_pages; 263 264 iov_iter_xarray(&iter, ITER_DEST, i_pages, offset, readahead_length(rac)); 265 266 /* read in the pages. */ 267 if ((ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, 268 &offset, &iter, readahead_length(rac), 269 inode->i_size, NULL, NULL, rac->file)) < 0) 270 gossip_debug(GOSSIP_FILE_DEBUG, 271 "%s: wait_for_direct_io failed. \n", __func__); 272 else 273 ret = 0; 274 275 /* clean up. */ 276 while ((folio = readahead_folio(rac))) { 277 if (!ret) 278 folio_mark_uptodate(folio); 279 folio_unlock(folio); 280 } 281 } 282 283 static int orangefs_read_folio(struct file *file, struct folio *folio) 284 { 285 struct inode *inode = folio->mapping->host; 286 struct iov_iter iter; 287 struct bio_vec bv; 288 ssize_t ret; 289 loff_t off; /* offset of this folio in the file */ 290 291 if (folio_test_dirty(folio)) 292 orangefs_launder_folio(folio); 293 294 off = folio_pos(folio); 295 bvec_set_folio(&bv, folio, folio_size(folio), 0); 296 iov_iter_bvec(&iter, ITER_DEST, &bv, 1, folio_size(folio)); 297 298 ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, &off, &iter, 299 folio_size(folio), inode->i_size, NULL, NULL, file); 300 /* this will only zero remaining unread portions of the folio data */ 301 iov_iter_zero(~0U, &iter); 302 /* takes care of potential aliasing */ 303 flush_dcache_folio(folio); 304 if (ret > 0) 305 ret = 0; 306 folio_end_read(folio, ret == 0); 307 return ret; 308 } 309 310 static int orangefs_write_begin(struct file *file, 311 struct address_space *mapping, loff_t pos, unsigned len, 312 struct page **pagep, void **fsdata) 313 { 314 struct orangefs_write_range *wr; 315 struct folio *folio; 316 struct page *page; 317 pgoff_t index; 318 int ret; 319 320 index = pos >> PAGE_SHIFT; 321 322 page = grab_cache_page_write_begin(mapping, index); 323 if (!page) 324 return -ENOMEM; 325 326 *pagep = page; 327 folio = page_folio(page); 328 329 if (folio_test_dirty(folio) && !folio_test_private(folio)) { 330 /* 331 * Should be impossible. If it happens, launder the page 332 * since we don't know what's dirty. This will WARN in 333 * orangefs_writepage_locked. 334 */ 335 ret = orangefs_launder_folio(folio); 336 if (ret) 337 return ret; 338 } 339 if (folio_test_private(folio)) { 340 struct orangefs_write_range *wr; 341 wr = folio_get_private(folio); 342 if (wr->pos + wr->len == pos && 343 uid_eq(wr->uid, current_fsuid()) && 344 gid_eq(wr->gid, current_fsgid())) { 345 wr->len += len; 346 goto okay; 347 } else { 348 ret = orangefs_launder_folio(folio); 349 if (ret) 350 return ret; 351 } 352 } 353 354 wr = kmalloc(sizeof *wr, GFP_KERNEL); 355 if (!wr) 356 return -ENOMEM; 357 358 wr->pos = pos; 359 wr->len = len; 360 wr->uid = current_fsuid(); 361 wr->gid = current_fsgid(); 362 folio_attach_private(folio, wr); 363 okay: 364 return 0; 365 } 366 367 static int orangefs_write_end(struct file *file, struct address_space *mapping, 368 loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) 369 { 370 struct inode *inode = page->mapping->host; 371 loff_t last_pos = pos + copied; 372 373 /* 374 * No need to use i_size_read() here, the i_size 375 * cannot change under us because we hold the i_mutex. 376 */ 377 if (last_pos > inode->i_size) 378 i_size_write(inode, last_pos); 379 380 /* zero the stale part of the page if we did a short copy */ 381 if (!PageUptodate(page)) { 382 unsigned from = pos & (PAGE_SIZE - 1); 383 if (copied < len) { 384 zero_user(page, from + copied, len - copied); 385 } 386 /* Set fully written pages uptodate. */ 387 if (pos == page_offset(page) && 388 (len == PAGE_SIZE || pos + len == inode->i_size)) { 389 zero_user_segment(page, from + copied, PAGE_SIZE); 390 SetPageUptodate(page); 391 } 392 } 393 394 set_page_dirty(page); 395 unlock_page(page); 396 put_page(page); 397 398 mark_inode_dirty_sync(file_inode(file)); 399 return copied; 400 } 401 402 static void orangefs_invalidate_folio(struct folio *folio, 403 size_t offset, size_t length) 404 { 405 struct orangefs_write_range *wr = folio_get_private(folio); 406 407 if (offset == 0 && length == PAGE_SIZE) { 408 kfree(folio_detach_private(folio)); 409 return; 410 /* write range entirely within invalidate range (or equal) */ 411 } else if (folio_pos(folio) + offset <= wr->pos && 412 wr->pos + wr->len <= folio_pos(folio) + offset + length) { 413 kfree(folio_detach_private(folio)); 414 /* XXX is this right? only caller in fs */ 415 folio_cancel_dirty(folio); 416 return; 417 /* invalidate range chops off end of write range */ 418 } else if (wr->pos < folio_pos(folio) + offset && 419 wr->pos + wr->len <= folio_pos(folio) + offset + length && 420 folio_pos(folio) + offset < wr->pos + wr->len) { 421 size_t x; 422 x = wr->pos + wr->len - (folio_pos(folio) + offset); 423 WARN_ON(x > wr->len); 424 wr->len -= x; 425 wr->uid = current_fsuid(); 426 wr->gid = current_fsgid(); 427 /* invalidate range chops off beginning of write range */ 428 } else if (folio_pos(folio) + offset <= wr->pos && 429 folio_pos(folio) + offset + length < wr->pos + wr->len && 430 wr->pos < folio_pos(folio) + offset + length) { 431 size_t x; 432 x = folio_pos(folio) + offset + length - wr->pos; 433 WARN_ON(x > wr->len); 434 wr->pos += x; 435 wr->len -= x; 436 wr->uid = current_fsuid(); 437 wr->gid = current_fsgid(); 438 /* invalidate range entirely within write range (punch hole) */ 439 } else if (wr->pos < folio_pos(folio) + offset && 440 folio_pos(folio) + offset + length < wr->pos + wr->len) { 441 /* XXX what do we do here... should not WARN_ON */ 442 WARN_ON(1); 443 /* punch hole */ 444 /* 445 * should we just ignore this and write it out anyway? 446 * it hardly makes sense 447 */ 448 return; 449 /* non-overlapping ranges */ 450 } else { 451 /* WARN if they do overlap */ 452 if (!((folio_pos(folio) + offset + length <= wr->pos) ^ 453 (wr->pos + wr->len <= folio_pos(folio) + offset))) { 454 WARN_ON(1); 455 printk("invalidate range offset %llu length %zu\n", 456 folio_pos(folio) + offset, length); 457 printk("write range offset %llu length %zu\n", 458 wr->pos, wr->len); 459 } 460 return; 461 } 462 463 /* 464 * Above there are returns where wr is freed or where we WARN. 465 * Thus the following runs if wr was modified above. 466 */ 467 468 orangefs_launder_folio(folio); 469 } 470 471 static bool orangefs_release_folio(struct folio *folio, gfp_t foo) 472 { 473 return !folio_test_private(folio); 474 } 475 476 static void orangefs_free_folio(struct folio *folio) 477 { 478 kfree(folio_detach_private(folio)); 479 } 480 481 static int orangefs_launder_folio(struct folio *folio) 482 { 483 int r = 0; 484 struct writeback_control wbc = { 485 .sync_mode = WB_SYNC_ALL, 486 .nr_to_write = 0, 487 }; 488 folio_wait_writeback(folio); 489 if (folio_clear_dirty_for_io(folio)) { 490 r = orangefs_writepage_locked(&folio->page, &wbc); 491 folio_end_writeback(folio); 492 } 493 return r; 494 } 495 496 static ssize_t orangefs_direct_IO(struct kiocb *iocb, 497 struct iov_iter *iter) 498 { 499 /* 500 * Comment from original do_readv_writev: 501 * Common entry point for read/write/readv/writev 502 * This function will dispatch it to either the direct I/O 503 * or buffered I/O path depending on the mount options and/or 504 * augmented/extended metadata attached to the file. 505 * Note: File extended attributes override any mount options. 506 */ 507 struct file *file = iocb->ki_filp; 508 loff_t pos = iocb->ki_pos; 509 enum ORANGEFS_io_type type = iov_iter_rw(iter) == WRITE ? 510 ORANGEFS_IO_WRITE : ORANGEFS_IO_READ; 511 loff_t *offset = &pos; 512 struct inode *inode = file->f_mapping->host; 513 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 514 struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; 515 size_t count = iov_iter_count(iter); 516 ssize_t total_count = 0; 517 ssize_t ret = -EINVAL; 518 519 gossip_debug(GOSSIP_FILE_DEBUG, 520 "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", 521 __func__, 522 handle, 523 (int)count); 524 525 if (type == ORANGEFS_IO_WRITE) { 526 gossip_debug(GOSSIP_FILE_DEBUG, 527 "%s(%pU): proceeding with offset : %llu, " 528 "size %d\n", 529 __func__, 530 handle, 531 llu(*offset), 532 (int)count); 533 } 534 535 if (count == 0) { 536 ret = 0; 537 goto out; 538 } 539 540 while (iov_iter_count(iter)) { 541 size_t each_count = iov_iter_count(iter); 542 size_t amt_complete; 543 544 /* how much to transfer in this loop iteration */ 545 if (each_count > orangefs_bufmap_size_query()) 546 each_count = orangefs_bufmap_size_query(); 547 548 gossip_debug(GOSSIP_FILE_DEBUG, 549 "%s(%pU): size of each_count(%d)\n", 550 __func__, 551 handle, 552 (int)each_count); 553 gossip_debug(GOSSIP_FILE_DEBUG, 554 "%s(%pU): BEFORE wait_for_io: offset is %d\n", 555 __func__, 556 handle, 557 (int)*offset); 558 559 ret = wait_for_direct_io(type, inode, offset, iter, 560 each_count, 0, NULL, NULL, file); 561 gossip_debug(GOSSIP_FILE_DEBUG, 562 "%s(%pU): return from wait_for_io:%d\n", 563 __func__, 564 handle, 565 (int)ret); 566 567 if (ret < 0) 568 goto out; 569 570 *offset += ret; 571 total_count += ret; 572 amt_complete = ret; 573 574 gossip_debug(GOSSIP_FILE_DEBUG, 575 "%s(%pU): AFTER wait_for_io: offset is %d\n", 576 __func__, 577 handle, 578 (int)*offset); 579 580 /* 581 * if we got a short I/O operations, 582 * fall out and return what we got so far 583 */ 584 if (amt_complete < each_count) 585 break; 586 } /*end while */ 587 588 out: 589 if (total_count > 0) 590 ret = total_count; 591 if (ret > 0) { 592 if (type == ORANGEFS_IO_READ) { 593 file_accessed(file); 594 } else { 595 file_update_time(file); 596 if (*offset > i_size_read(inode)) 597 i_size_write(inode, *offset); 598 } 599 } 600 601 gossip_debug(GOSSIP_FILE_DEBUG, 602 "%s(%pU): Value(%d) returned.\n", 603 __func__, 604 handle, 605 (int)ret); 606 607 return ret; 608 } 609 610 /** ORANGEFS2 implementation of address space operations */ 611 static const struct address_space_operations orangefs_address_operations = { 612 .writepage = orangefs_writepage, 613 .readahead = orangefs_readahead, 614 .read_folio = orangefs_read_folio, 615 .writepages = orangefs_writepages, 616 .dirty_folio = filemap_dirty_folio, 617 .write_begin = orangefs_write_begin, 618 .write_end = orangefs_write_end, 619 .invalidate_folio = orangefs_invalidate_folio, 620 .release_folio = orangefs_release_folio, 621 .free_folio = orangefs_free_folio, 622 .launder_folio = orangefs_launder_folio, 623 .direct_IO = orangefs_direct_IO, 624 }; 625 626 vm_fault_t orangefs_page_mkwrite(struct vm_fault *vmf) 627 { 628 struct folio *folio = page_folio(vmf->page); 629 struct inode *inode = file_inode(vmf->vma->vm_file); 630 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 631 unsigned long *bitlock = &orangefs_inode->bitlock; 632 vm_fault_t ret; 633 struct orangefs_write_range *wr; 634 635 sb_start_pagefault(inode->i_sb); 636 637 if (wait_on_bit(bitlock, 1, TASK_KILLABLE)) { 638 ret = VM_FAULT_RETRY; 639 goto out; 640 } 641 642 folio_lock(folio); 643 if (folio_test_dirty(folio) && !folio_test_private(folio)) { 644 /* 645 * Should be impossible. If it happens, launder the folio 646 * since we don't know what's dirty. This will WARN in 647 * orangefs_writepage_locked. 648 */ 649 if (orangefs_launder_folio(folio)) { 650 ret = VM_FAULT_LOCKED|VM_FAULT_RETRY; 651 goto out; 652 } 653 } 654 if (folio_test_private(folio)) { 655 wr = folio_get_private(folio); 656 if (uid_eq(wr->uid, current_fsuid()) && 657 gid_eq(wr->gid, current_fsgid())) { 658 wr->pos = page_offset(vmf->page); 659 wr->len = PAGE_SIZE; 660 goto okay; 661 } else { 662 if (orangefs_launder_folio(folio)) { 663 ret = VM_FAULT_LOCKED|VM_FAULT_RETRY; 664 goto out; 665 } 666 } 667 } 668 wr = kmalloc(sizeof *wr, GFP_KERNEL); 669 if (!wr) { 670 ret = VM_FAULT_LOCKED|VM_FAULT_RETRY; 671 goto out; 672 } 673 wr->pos = page_offset(vmf->page); 674 wr->len = PAGE_SIZE; 675 wr->uid = current_fsuid(); 676 wr->gid = current_fsgid(); 677 folio_attach_private(folio, wr); 678 okay: 679 680 file_update_time(vmf->vma->vm_file); 681 if (folio->mapping != inode->i_mapping) { 682 folio_unlock(folio); 683 ret = VM_FAULT_LOCKED|VM_FAULT_NOPAGE; 684 goto out; 685 } 686 687 /* 688 * We mark the folio dirty already here so that when freeze is in 689 * progress, we are guaranteed that writeback during freezing will 690 * see the dirty folio and writeprotect it again. 691 */ 692 folio_mark_dirty(folio); 693 folio_wait_stable(folio); 694 ret = VM_FAULT_LOCKED; 695 out: 696 sb_end_pagefault(inode->i_sb); 697 return ret; 698 } 699 700 static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr) 701 { 702 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 703 struct orangefs_kernel_op_s *new_op; 704 loff_t orig_size; 705 int ret = -EINVAL; 706 707 gossip_debug(GOSSIP_INODE_DEBUG, 708 "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n", 709 __func__, 710 get_khandle_from_ino(inode), 711 &orangefs_inode->refn.khandle, 712 orangefs_inode->refn.fs_id, 713 iattr->ia_size); 714 715 /* Ensure that we have a up to date size, so we know if it changed. */ 716 ret = orangefs_inode_getattr(inode, ORANGEFS_GETATTR_SIZE); 717 if (ret == -ESTALE) 718 ret = -EIO; 719 if (ret) { 720 gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n", 721 __func__, ret); 722 return ret; 723 } 724 orig_size = i_size_read(inode); 725 726 /* This is truncate_setsize in a different order. */ 727 truncate_pagecache(inode, iattr->ia_size); 728 i_size_write(inode, iattr->ia_size); 729 if (iattr->ia_size > orig_size) 730 pagecache_isize_extended(inode, orig_size, iattr->ia_size); 731 732 new_op = op_alloc(ORANGEFS_VFS_OP_TRUNCATE); 733 if (!new_op) 734 return -ENOMEM; 735 736 new_op->upcall.req.truncate.refn = orangefs_inode->refn; 737 new_op->upcall.req.truncate.size = (__s64) iattr->ia_size; 738 739 ret = service_operation(new_op, 740 __func__, 741 get_interruptible_flag(inode)); 742 743 /* 744 * the truncate has no downcall members to retrieve, but 745 * the status value tells us if it went through ok or not 746 */ 747 gossip_debug(GOSSIP_INODE_DEBUG, "%s: ret:%d:\n", __func__, ret); 748 749 op_release(new_op); 750 751 if (ret != 0) 752 return ret; 753 754 if (orig_size != i_size_read(inode)) 755 iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; 756 757 return ret; 758 } 759 760 int __orangefs_setattr(struct inode *inode, struct iattr *iattr) 761 { 762 int ret; 763 764 if (iattr->ia_valid & ATTR_MODE) { 765 if (iattr->ia_mode & (S_ISVTX)) { 766 if (is_root_handle(inode)) { 767 /* 768 * allow sticky bit to be set on root (since 769 * it shows up that way by default anyhow), 770 * but don't show it to the server 771 */ 772 iattr->ia_mode -= S_ISVTX; 773 } else { 774 gossip_debug(GOSSIP_UTILS_DEBUG, 775 "User attempted to set sticky bit on non-root directory; returning EINVAL.\n"); 776 ret = -EINVAL; 777 goto out; 778 } 779 } 780 if (iattr->ia_mode & (S_ISUID)) { 781 gossip_debug(GOSSIP_UTILS_DEBUG, 782 "Attempting to set setuid bit (not supported); returning EINVAL.\n"); 783 ret = -EINVAL; 784 goto out; 785 } 786 } 787 788 if (iattr->ia_valid & ATTR_SIZE) { 789 ret = orangefs_setattr_size(inode, iattr); 790 if (ret) 791 goto out; 792 } 793 794 again: 795 spin_lock(&inode->i_lock); 796 if (ORANGEFS_I(inode)->attr_valid) { 797 if (uid_eq(ORANGEFS_I(inode)->attr_uid, current_fsuid()) && 798 gid_eq(ORANGEFS_I(inode)->attr_gid, current_fsgid())) { 799 ORANGEFS_I(inode)->attr_valid = iattr->ia_valid; 800 } else { 801 spin_unlock(&inode->i_lock); 802 write_inode_now(inode, 1); 803 goto again; 804 } 805 } else { 806 ORANGEFS_I(inode)->attr_valid = iattr->ia_valid; 807 ORANGEFS_I(inode)->attr_uid = current_fsuid(); 808 ORANGEFS_I(inode)->attr_gid = current_fsgid(); 809 } 810 setattr_copy(&nop_mnt_idmap, inode, iattr); 811 spin_unlock(&inode->i_lock); 812 mark_inode_dirty(inode); 813 814 ret = 0; 815 out: 816 return ret; 817 } 818 819 int __orangefs_setattr_mode(struct dentry *dentry, struct iattr *iattr) 820 { 821 int ret; 822 struct inode *inode = d_inode(dentry); 823 824 ret = __orangefs_setattr(inode, iattr); 825 /* change mode on a file that has ACLs */ 826 if (!ret && (iattr->ia_valid & ATTR_MODE)) 827 ret = posix_acl_chmod(&nop_mnt_idmap, dentry, inode->i_mode); 828 return ret; 829 } 830 831 /* 832 * Change attributes of an object referenced by dentry. 833 */ 834 int orangefs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 835 struct iattr *iattr) 836 { 837 int ret; 838 gossip_debug(GOSSIP_INODE_DEBUG, "__orangefs_setattr: called on %pd\n", 839 dentry); 840 ret = setattr_prepare(&nop_mnt_idmap, dentry, iattr); 841 if (ret) 842 goto out; 843 ret = __orangefs_setattr_mode(dentry, iattr); 844 sync_inode_metadata(d_inode(dentry), 1); 845 out: 846 gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_setattr: returning %d\n", 847 ret); 848 return ret; 849 } 850 851 /* 852 * Obtain attributes of an object given a dentry 853 */ 854 int orangefs_getattr(struct mnt_idmap *idmap, const struct path *path, 855 struct kstat *stat, u32 request_mask, unsigned int flags) 856 { 857 int ret; 858 struct inode *inode = path->dentry->d_inode; 859 860 gossip_debug(GOSSIP_INODE_DEBUG, 861 "orangefs_getattr: called on %pd mask %u\n", 862 path->dentry, request_mask); 863 864 ret = orangefs_inode_getattr(inode, 865 request_mask & STATX_SIZE ? ORANGEFS_GETATTR_SIZE : 0); 866 if (ret == 0) { 867 generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); 868 869 /* override block size reported to stat */ 870 if (!(request_mask & STATX_SIZE)) 871 stat->result_mask &= ~STATX_SIZE; 872 873 generic_fill_statx_attr(inode, stat); 874 } 875 return ret; 876 } 877 878 int orangefs_permission(struct mnt_idmap *idmap, 879 struct inode *inode, int mask) 880 { 881 int ret; 882 883 if (mask & MAY_NOT_BLOCK) 884 return -ECHILD; 885 886 gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__); 887 888 /* Make sure the permission (and other common attrs) are up to date. */ 889 ret = orangefs_inode_getattr(inode, 0); 890 if (ret < 0) 891 return ret; 892 893 return generic_permission(&nop_mnt_idmap, inode, mask); 894 } 895 896 int orangefs_update_time(struct inode *inode, int flags) 897 { 898 struct iattr iattr; 899 900 gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_update_time: %pU\n", 901 get_khandle_from_ino(inode)); 902 flags = generic_update_time(inode, flags); 903 memset(&iattr, 0, sizeof iattr); 904 if (flags & S_ATIME) 905 iattr.ia_valid |= ATTR_ATIME; 906 if (flags & S_CTIME) 907 iattr.ia_valid |= ATTR_CTIME; 908 if (flags & S_MTIME) 909 iattr.ia_valid |= ATTR_MTIME; 910 return __orangefs_setattr(inode, &iattr); 911 } 912 913 static int orangefs_fileattr_get(struct dentry *dentry, struct fileattr *fa) 914 { 915 u64 val = 0; 916 int ret; 917 918 gossip_debug(GOSSIP_FILE_DEBUG, "%s: called on %pd\n", __func__, 919 dentry); 920 921 ret = orangefs_inode_getxattr(d_inode(dentry), 922 "user.pvfs2.meta_hint", 923 &val, sizeof(val)); 924 if (ret < 0 && ret != -ENODATA) 925 return ret; 926 927 gossip_debug(GOSSIP_FILE_DEBUG, "%s: flags=%u\n", __func__, (u32) val); 928 929 fileattr_fill_flags(fa, val); 930 return 0; 931 } 932 933 static int orangefs_fileattr_set(struct mnt_idmap *idmap, 934 struct dentry *dentry, struct fileattr *fa) 935 { 936 u64 val = 0; 937 938 gossip_debug(GOSSIP_FILE_DEBUG, "%s: called on %pd\n", __func__, 939 dentry); 940 /* 941 * ORANGEFS_MIRROR_FL is set internally when the mirroring mode is 942 * turned on for a file. The user is not allowed to turn on this bit, 943 * but the bit is present if the user first gets the flags and then 944 * updates the flags with some new settings. So, we ignore it in the 945 * following edit. bligon. 946 */ 947 if (fileattr_has_fsx(fa) || 948 (fa->flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NOATIME_FL | ORANGEFS_MIRROR_FL))) { 949 gossip_err("%s: only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n", 950 __func__); 951 return -EOPNOTSUPP; 952 } 953 val = fa->flags; 954 gossip_debug(GOSSIP_FILE_DEBUG, "%s: flags=%u\n", __func__, (u32) val); 955 return orangefs_inode_setxattr(d_inode(dentry), 956 "user.pvfs2.meta_hint", 957 &val, sizeof(val), 0); 958 } 959 960 /* ORANGEFS2 implementation of VFS inode operations for files */ 961 static const struct inode_operations orangefs_file_inode_operations = { 962 .get_inode_acl = orangefs_get_acl, 963 .set_acl = orangefs_set_acl, 964 .setattr = orangefs_setattr, 965 .getattr = orangefs_getattr, 966 .listxattr = orangefs_listxattr, 967 .permission = orangefs_permission, 968 .update_time = orangefs_update_time, 969 .fileattr_get = orangefs_fileattr_get, 970 .fileattr_set = orangefs_fileattr_set, 971 }; 972 973 static int orangefs_init_iops(struct inode *inode) 974 { 975 inode->i_mapping->a_ops = &orangefs_address_operations; 976 977 switch (inode->i_mode & S_IFMT) { 978 case S_IFREG: 979 inode->i_op = &orangefs_file_inode_operations; 980 inode->i_fop = &orangefs_file_operations; 981 break; 982 case S_IFLNK: 983 inode->i_op = &orangefs_symlink_inode_operations; 984 break; 985 case S_IFDIR: 986 inode->i_op = &orangefs_dir_inode_operations; 987 inode->i_fop = &orangefs_dir_operations; 988 break; 989 default: 990 gossip_debug(GOSSIP_INODE_DEBUG, 991 "%s: unsupported mode\n", 992 __func__); 993 return -EINVAL; 994 } 995 996 return 0; 997 } 998 999 /* 1000 * Given an ORANGEFS object identifier (fsid, handle), convert it into 1001 * a ino_t type that will be used as a hash-index from where the handle will 1002 * be searched for in the VFS hash table of inodes. 1003 */ 1004 static inline ino_t orangefs_handle_hash(struct orangefs_object_kref *ref) 1005 { 1006 if (!ref) 1007 return 0; 1008 return orangefs_khandle_to_ino(&(ref->khandle)); 1009 } 1010 1011 /* 1012 * Called to set up an inode from iget5_locked. 1013 */ 1014 static int orangefs_set_inode(struct inode *inode, void *data) 1015 { 1016 struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data; 1017 ORANGEFS_I(inode)->refn.fs_id = ref->fs_id; 1018 ORANGEFS_I(inode)->refn.khandle = ref->khandle; 1019 ORANGEFS_I(inode)->attr_valid = 0; 1020 hash_init(ORANGEFS_I(inode)->xattr_cache); 1021 ORANGEFS_I(inode)->mapping_time = jiffies - 1; 1022 ORANGEFS_I(inode)->bitlock = 0; 1023 return 0; 1024 } 1025 1026 /* 1027 * Called to determine if handles match. 1028 */ 1029 static int orangefs_test_inode(struct inode *inode, void *data) 1030 { 1031 struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data; 1032 struct orangefs_inode_s *orangefs_inode = NULL; 1033 1034 orangefs_inode = ORANGEFS_I(inode); 1035 /* test handles and fs_ids... */ 1036 return (!ORANGEFS_khandle_cmp(&(orangefs_inode->refn.khandle), 1037 &(ref->khandle)) && 1038 orangefs_inode->refn.fs_id == ref->fs_id); 1039 } 1040 1041 /* 1042 * Front-end to lookup the inode-cache maintained by the VFS using the ORANGEFS 1043 * file handle. 1044 * 1045 * @sb: the file system super block instance. 1046 * @ref: The ORANGEFS object for which we are trying to locate an inode. 1047 */ 1048 struct inode *orangefs_iget(struct super_block *sb, 1049 struct orangefs_object_kref *ref) 1050 { 1051 struct inode *inode = NULL; 1052 unsigned long hash; 1053 int error; 1054 1055 hash = orangefs_handle_hash(ref); 1056 inode = iget5_locked(sb, 1057 hash, 1058 orangefs_test_inode, 1059 orangefs_set_inode, 1060 ref); 1061 1062 if (!inode) 1063 return ERR_PTR(-ENOMEM); 1064 1065 if (!(inode->i_state & I_NEW)) 1066 return inode; 1067 1068 error = orangefs_inode_getattr(inode, ORANGEFS_GETATTR_NEW); 1069 if (error) { 1070 iget_failed(inode); 1071 return ERR_PTR(error); 1072 } 1073 1074 inode->i_ino = hash; /* needed for stat etc */ 1075 orangefs_init_iops(inode); 1076 unlock_new_inode(inode); 1077 1078 gossip_debug(GOSSIP_INODE_DEBUG, 1079 "iget handle %pU, fsid %d hash %ld i_ino %lu\n", 1080 &ref->khandle, 1081 ref->fs_id, 1082 hash, 1083 inode->i_ino); 1084 1085 return inode; 1086 } 1087 1088 /* 1089 * Allocate an inode for a newly created file and insert it into the inode hash. 1090 */ 1091 struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, 1092 umode_t mode, dev_t dev, struct orangefs_object_kref *ref) 1093 { 1094 struct posix_acl *acl = NULL, *default_acl = NULL; 1095 unsigned long hash = orangefs_handle_hash(ref); 1096 struct inode *inode; 1097 int error; 1098 1099 gossip_debug(GOSSIP_INODE_DEBUG, 1100 "%s:(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n", 1101 __func__, 1102 sb, 1103 MAJOR(dev), 1104 MINOR(dev), 1105 mode); 1106 1107 inode = new_inode(sb); 1108 if (!inode) 1109 return ERR_PTR(-ENOMEM); 1110 1111 error = posix_acl_create(dir, &mode, &default_acl, &acl); 1112 if (error) 1113 goto out_iput; 1114 1115 orangefs_set_inode(inode, ref); 1116 inode->i_ino = hash; /* needed for stat etc */ 1117 1118 error = orangefs_inode_getattr(inode, ORANGEFS_GETATTR_NEW); 1119 if (error) 1120 goto out_iput; 1121 1122 orangefs_init_iops(inode); 1123 inode->i_rdev = dev; 1124 1125 if (default_acl) { 1126 error = __orangefs_set_acl(inode, default_acl, 1127 ACL_TYPE_DEFAULT); 1128 if (error) 1129 goto out_iput; 1130 } 1131 1132 if (acl) { 1133 error = __orangefs_set_acl(inode, acl, ACL_TYPE_ACCESS); 1134 if (error) 1135 goto out_iput; 1136 } 1137 1138 error = insert_inode_locked4(inode, hash, orangefs_test_inode, ref); 1139 if (error < 0) 1140 goto out_iput; 1141 1142 gossip_debug(GOSSIP_INODE_DEBUG, 1143 "Initializing ACL's for inode %pU\n", 1144 get_khandle_from_ino(inode)); 1145 if (mode != inode->i_mode) { 1146 struct iattr iattr = { 1147 .ia_mode = mode, 1148 .ia_valid = ATTR_MODE, 1149 }; 1150 inode->i_mode = mode; 1151 __orangefs_setattr(inode, &iattr); 1152 __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); 1153 } 1154 posix_acl_release(acl); 1155 posix_acl_release(default_acl); 1156 return inode; 1157 1158 out_iput: 1159 iput(inode); 1160 posix_acl_release(acl); 1161 posix_acl_release(default_acl); 1162 return ERR_PTR(error); 1163 } 1164