1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * 4 * vfs operations that deal with files 5 * 6 * Copyright (C) International Business Machines Corp., 2002,2010 7 * Author(s): Steve French (sfrench@us.ibm.com) 8 * Jeremy Allison (jra@samba.org) 9 * 10 */ 11 #include <linux/fs.h> 12 #include <linux/filelock.h> 13 #include <linux/backing-dev.h> 14 #include <linux/stat.h> 15 #include <linux/fcntl.h> 16 #include <linux/pagemap.h> 17 #include <linux/pagevec.h> 18 #include <linux/writeback.h> 19 #include <linux/task_io_accounting_ops.h> 20 #include <linux/delay.h> 21 #include <linux/mount.h> 22 #include <linux/slab.h> 23 #include <linux/swap.h> 24 #include <linux/mm.h> 25 #include <asm/div64.h> 26 #include "cifsfs.h" 27 #include "cifspdu.h" 28 #include "cifsglob.h" 29 #include "cifsproto.h" 30 #include "smb2proto.h" 31 #include "cifs_unicode.h" 32 #include "cifs_debug.h" 33 #include "cifs_fs_sb.h" 34 #include "fscache.h" 35 #include "smbdirect.h" 36 #include "fs_context.h" 37 #include "cifs_ioctl.h" 38 #include "cached_dir.h" 39 40 /* 41 * Remove the dirty flags from a span of pages. 42 */ 43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len) 44 { 45 struct address_space *mapping = inode->i_mapping; 46 struct folio *folio; 47 pgoff_t end; 48 49 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 50 51 rcu_read_lock(); 52 53 end = (start + len - 1) / PAGE_SIZE; 54 xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) { 55 if (xas_retry(&xas, folio)) 56 continue; 57 xas_pause(&xas); 58 rcu_read_unlock(); 59 folio_lock(folio); 60 folio_clear_dirty_for_io(folio); 61 folio_unlock(folio); 62 rcu_read_lock(); 63 } 64 65 rcu_read_unlock(); 66 } 67 68 /* 69 * Completion of write to server. 70 */ 71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len) 72 { 73 struct address_space *mapping = inode->i_mapping; 74 struct folio *folio; 75 pgoff_t end; 76 77 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 78 79 if (!len) 80 return; 81 82 rcu_read_lock(); 83 84 end = (start + len - 1) / PAGE_SIZE; 85 xas_for_each(&xas, folio, end) { 86 if (xas_retry(&xas, folio)) 87 continue; 88 if (!folio_test_writeback(folio)) { 89 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 90 len, start, folio->index, end); 91 continue; 92 } 93 94 folio_detach_private(folio); 95 folio_end_writeback(folio); 96 } 97 98 rcu_read_unlock(); 99 } 100 101 /* 102 * Failure of write to server. 103 */ 104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len) 105 { 106 struct address_space *mapping = inode->i_mapping; 107 struct folio *folio; 108 pgoff_t end; 109 110 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 111 112 if (!len) 113 return; 114 115 rcu_read_lock(); 116 117 end = (start + len - 1) / PAGE_SIZE; 118 xas_for_each(&xas, folio, end) { 119 if (xas_retry(&xas, folio)) 120 continue; 121 if (!folio_test_writeback(folio)) { 122 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 123 len, start, folio->index, end); 124 continue; 125 } 126 127 folio_set_error(folio); 128 folio_end_writeback(folio); 129 } 130 131 rcu_read_unlock(); 132 } 133 134 /* 135 * Redirty pages after a temporary failure. 136 */ 137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len) 138 { 139 struct address_space *mapping = inode->i_mapping; 140 struct folio *folio; 141 pgoff_t end; 142 143 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 144 145 if (!len) 146 return; 147 148 rcu_read_lock(); 149 150 end = (start + len - 1) / PAGE_SIZE; 151 xas_for_each(&xas, folio, end) { 152 if (!folio_test_writeback(folio)) { 153 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 154 len, start, folio->index, end); 155 continue; 156 } 157 158 filemap_dirty_folio(folio->mapping, folio); 159 folio_end_writeback(folio); 160 } 161 162 rcu_read_unlock(); 163 } 164 165 /* 166 * Mark as invalid, all open files on tree connections since they 167 * were closed when session to server was lost. 168 */ 169 void 170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon) 171 { 172 struct cifsFileInfo *open_file = NULL; 173 struct list_head *tmp; 174 struct list_head *tmp1; 175 176 /* only send once per connect */ 177 spin_lock(&tcon->tc_lock); 178 if (tcon->need_reconnect) 179 tcon->status = TID_NEED_RECON; 180 181 if (tcon->status != TID_NEED_RECON) { 182 spin_unlock(&tcon->tc_lock); 183 return; 184 } 185 tcon->status = TID_IN_FILES_INVALIDATE; 186 spin_unlock(&tcon->tc_lock); 187 188 /* list all files open on tree connection and mark them invalid */ 189 spin_lock(&tcon->open_file_lock); 190 list_for_each_safe(tmp, tmp1, &tcon->openFileList) { 191 open_file = list_entry(tmp, struct cifsFileInfo, tlist); 192 open_file->invalidHandle = true; 193 open_file->oplock_break_cancelled = true; 194 } 195 spin_unlock(&tcon->open_file_lock); 196 197 invalidate_all_cached_dirs(tcon); 198 spin_lock(&tcon->tc_lock); 199 if (tcon->status == TID_IN_FILES_INVALIDATE) 200 tcon->status = TID_NEED_TCON; 201 spin_unlock(&tcon->tc_lock); 202 203 /* 204 * BB Add call to invalidate_inodes(sb) for all superblocks mounted 205 * to this tcon. 206 */ 207 } 208 209 static inline int cifs_convert_flags(unsigned int flags) 210 { 211 if ((flags & O_ACCMODE) == O_RDONLY) 212 return GENERIC_READ; 213 else if ((flags & O_ACCMODE) == O_WRONLY) 214 return GENERIC_WRITE; 215 else if ((flags & O_ACCMODE) == O_RDWR) { 216 /* GENERIC_ALL is too much permission to request 217 can cause unnecessary access denied on create */ 218 /* return GENERIC_ALL; */ 219 return (GENERIC_READ | GENERIC_WRITE); 220 } 221 222 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES | 223 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA | 224 FILE_READ_DATA); 225 } 226 227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 228 static u32 cifs_posix_convert_flags(unsigned int flags) 229 { 230 u32 posix_flags = 0; 231 232 if ((flags & O_ACCMODE) == O_RDONLY) 233 posix_flags = SMB_O_RDONLY; 234 else if ((flags & O_ACCMODE) == O_WRONLY) 235 posix_flags = SMB_O_WRONLY; 236 else if ((flags & O_ACCMODE) == O_RDWR) 237 posix_flags = SMB_O_RDWR; 238 239 if (flags & O_CREAT) { 240 posix_flags |= SMB_O_CREAT; 241 if (flags & O_EXCL) 242 posix_flags |= SMB_O_EXCL; 243 } else if (flags & O_EXCL) 244 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n", 245 current->comm, current->tgid); 246 247 if (flags & O_TRUNC) 248 posix_flags |= SMB_O_TRUNC; 249 /* be safe and imply O_SYNC for O_DSYNC */ 250 if (flags & O_DSYNC) 251 posix_flags |= SMB_O_SYNC; 252 if (flags & O_DIRECTORY) 253 posix_flags |= SMB_O_DIRECTORY; 254 if (flags & O_NOFOLLOW) 255 posix_flags |= SMB_O_NOFOLLOW; 256 if (flags & O_DIRECT) 257 posix_flags |= SMB_O_DIRECT; 258 259 return posix_flags; 260 } 261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 262 263 static inline int cifs_get_disposition(unsigned int flags) 264 { 265 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) 266 return FILE_CREATE; 267 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) 268 return FILE_OVERWRITE_IF; 269 else if ((flags & O_CREAT) == O_CREAT) 270 return FILE_OPEN_IF; 271 else if ((flags & O_TRUNC) == O_TRUNC) 272 return FILE_OVERWRITE; 273 else 274 return FILE_OPEN; 275 } 276 277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 278 int cifs_posix_open(const char *full_path, struct inode **pinode, 279 struct super_block *sb, int mode, unsigned int f_flags, 280 __u32 *poplock, __u16 *pnetfid, unsigned int xid) 281 { 282 int rc; 283 FILE_UNIX_BASIC_INFO *presp_data; 284 __u32 posix_flags = 0; 285 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 286 struct cifs_fattr fattr; 287 struct tcon_link *tlink; 288 struct cifs_tcon *tcon; 289 290 cifs_dbg(FYI, "posix open %s\n", full_path); 291 292 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); 293 if (presp_data == NULL) 294 return -ENOMEM; 295 296 tlink = cifs_sb_tlink(cifs_sb); 297 if (IS_ERR(tlink)) { 298 rc = PTR_ERR(tlink); 299 goto posix_open_ret; 300 } 301 302 tcon = tlink_tcon(tlink); 303 mode &= ~current_umask(); 304 305 posix_flags = cifs_posix_convert_flags(f_flags); 306 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, 307 poplock, full_path, cifs_sb->local_nls, 308 cifs_remap(cifs_sb)); 309 cifs_put_tlink(tlink); 310 311 if (rc) 312 goto posix_open_ret; 313 314 if (presp_data->Type == cpu_to_le32(-1)) 315 goto posix_open_ret; /* open ok, caller does qpathinfo */ 316 317 if (!pinode) 318 goto posix_open_ret; /* caller does not need info */ 319 320 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb); 321 322 /* get new inode and set it up */ 323 if (*pinode == NULL) { 324 cifs_fill_uniqueid(sb, &fattr); 325 *pinode = cifs_iget(sb, &fattr); 326 if (!*pinode) { 327 rc = -ENOMEM; 328 goto posix_open_ret; 329 } 330 } else { 331 cifs_revalidate_mapping(*pinode); 332 rc = cifs_fattr_to_inode(*pinode, &fattr, false); 333 } 334 335 posix_open_ret: 336 kfree(presp_data); 337 return rc; 338 } 339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 340 341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, 342 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock, 343 struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf) 344 { 345 int rc; 346 int desired_access; 347 int disposition; 348 int create_options = CREATE_NOT_DIR; 349 struct TCP_Server_Info *server = tcon->ses->server; 350 struct cifs_open_parms oparms; 351 352 if (!server->ops->open) 353 return -ENOSYS; 354 355 desired_access = cifs_convert_flags(f_flags); 356 357 /********************************************************************* 358 * open flag mapping table: 359 * 360 * POSIX Flag CIFS Disposition 361 * ---------- ---------------- 362 * O_CREAT FILE_OPEN_IF 363 * O_CREAT | O_EXCL FILE_CREATE 364 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF 365 * O_TRUNC FILE_OVERWRITE 366 * none of the above FILE_OPEN 367 * 368 * Note that there is not a direct match between disposition 369 * FILE_SUPERSEDE (ie create whether or not file exists although 370 * O_CREAT | O_TRUNC is similar but truncates the existing 371 * file rather than creating a new file as FILE_SUPERSEDE does 372 * (which uses the attributes / metadata passed in on open call) 373 *? 374 *? O_SYNC is a reasonable match to CIFS writethrough flag 375 *? and the read write flags match reasonably. O_LARGEFILE 376 *? is irrelevant because largefile support is always used 377 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY, 378 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation 379 *********************************************************************/ 380 381 disposition = cifs_get_disposition(f_flags); 382 383 /* BB pass O_SYNC flag through on file attributes .. BB */ 384 385 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 386 if (f_flags & O_SYNC) 387 create_options |= CREATE_WRITE_THROUGH; 388 389 if (f_flags & O_DIRECT) 390 create_options |= CREATE_NO_BUFFER; 391 392 oparms = (struct cifs_open_parms) { 393 .tcon = tcon, 394 .cifs_sb = cifs_sb, 395 .desired_access = desired_access, 396 .create_options = cifs_create_options(cifs_sb, create_options), 397 .disposition = disposition, 398 .path = full_path, 399 .fid = fid, 400 }; 401 402 rc = server->ops->open(xid, &oparms, oplock, buf); 403 if (rc) 404 return rc; 405 406 /* TODO: Add support for calling posix query info but with passing in fid */ 407 if (tcon->unix_ext) 408 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, 409 xid); 410 else 411 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, 412 xid, fid); 413 414 if (rc) { 415 server->ops->close(xid, tcon, fid); 416 if (rc == -ESTALE) 417 rc = -EOPENSTALE; 418 } 419 420 return rc; 421 } 422 423 static bool 424 cifs_has_mand_locks(struct cifsInodeInfo *cinode) 425 { 426 struct cifs_fid_locks *cur; 427 bool has_locks = false; 428 429 down_read(&cinode->lock_sem); 430 list_for_each_entry(cur, &cinode->llist, llist) { 431 if (!list_empty(&cur->locks)) { 432 has_locks = true; 433 break; 434 } 435 } 436 up_read(&cinode->lock_sem); 437 return has_locks; 438 } 439 440 void 441 cifs_down_write(struct rw_semaphore *sem) 442 { 443 while (!down_write_trylock(sem)) 444 msleep(10); 445 } 446 447 static void cifsFileInfo_put_work(struct work_struct *work); 448 449 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, 450 struct tcon_link *tlink, __u32 oplock, 451 const char *symlink_target) 452 { 453 struct dentry *dentry = file_dentry(file); 454 struct inode *inode = d_inode(dentry); 455 struct cifsInodeInfo *cinode = CIFS_I(inode); 456 struct cifsFileInfo *cfile; 457 struct cifs_fid_locks *fdlocks; 458 struct cifs_tcon *tcon = tlink_tcon(tlink); 459 struct TCP_Server_Info *server = tcon->ses->server; 460 461 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 462 if (cfile == NULL) 463 return cfile; 464 465 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL); 466 if (!fdlocks) { 467 kfree(cfile); 468 return NULL; 469 } 470 471 if (symlink_target) { 472 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL); 473 if (!cfile->symlink_target) { 474 kfree(fdlocks); 475 kfree(cfile); 476 return NULL; 477 } 478 } 479 480 INIT_LIST_HEAD(&fdlocks->locks); 481 fdlocks->cfile = cfile; 482 cfile->llist = fdlocks; 483 484 cfile->count = 1; 485 cfile->pid = current->tgid; 486 cfile->uid = current_fsuid(); 487 cfile->dentry = dget(dentry); 488 cfile->f_flags = file->f_flags; 489 cfile->status_file_deleted = false; 490 cfile->invalidHandle = false; 491 cfile->deferred_close_scheduled = false; 492 cfile->tlink = cifs_get_tlink(tlink); 493 INIT_WORK(&cfile->oplock_break, cifs_oplock_break); 494 INIT_WORK(&cfile->put, cifsFileInfo_put_work); 495 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); 496 mutex_init(&cfile->fh_mutex); 497 spin_lock_init(&cfile->file_info_lock); 498 499 cifs_sb_active(inode->i_sb); 500 501 /* 502 * If the server returned a read oplock and we have mandatory brlocks, 503 * set oplock level to None. 504 */ 505 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 506 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 507 oplock = 0; 508 } 509 510 cifs_down_write(&cinode->lock_sem); 511 list_add(&fdlocks->llist, &cinode->llist); 512 up_write(&cinode->lock_sem); 513 514 spin_lock(&tcon->open_file_lock); 515 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) 516 oplock = fid->pending_open->oplock; 517 list_del(&fid->pending_open->olist); 518 519 fid->purge_cache = false; 520 server->ops->set_fid(cfile, fid, oplock); 521 522 list_add(&cfile->tlist, &tcon->openFileList); 523 atomic_inc(&tcon->num_local_opens); 524 525 /* if readable file instance put first in list*/ 526 spin_lock(&cinode->open_file_lock); 527 if (file->f_mode & FMODE_READ) 528 list_add(&cfile->flist, &cinode->openFileList); 529 else 530 list_add_tail(&cfile->flist, &cinode->openFileList); 531 spin_unlock(&cinode->open_file_lock); 532 spin_unlock(&tcon->open_file_lock); 533 534 if (fid->purge_cache) 535 cifs_zap_mapping(inode); 536 537 file->private_data = cfile; 538 return cfile; 539 } 540 541 struct cifsFileInfo * 542 cifsFileInfo_get(struct cifsFileInfo *cifs_file) 543 { 544 spin_lock(&cifs_file->file_info_lock); 545 cifsFileInfo_get_locked(cifs_file); 546 spin_unlock(&cifs_file->file_info_lock); 547 return cifs_file; 548 } 549 550 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) 551 { 552 struct inode *inode = d_inode(cifs_file->dentry); 553 struct cifsInodeInfo *cifsi = CIFS_I(inode); 554 struct cifsLockInfo *li, *tmp; 555 struct super_block *sb = inode->i_sb; 556 557 /* 558 * Delete any outstanding lock records. We'll lose them when the file 559 * is closed anyway. 560 */ 561 cifs_down_write(&cifsi->lock_sem); 562 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { 563 list_del(&li->llist); 564 cifs_del_lock_waiters(li); 565 kfree(li); 566 } 567 list_del(&cifs_file->llist->llist); 568 kfree(cifs_file->llist); 569 up_write(&cifsi->lock_sem); 570 571 cifs_put_tlink(cifs_file->tlink); 572 dput(cifs_file->dentry); 573 cifs_sb_deactive(sb); 574 kfree(cifs_file->symlink_target); 575 kfree(cifs_file); 576 } 577 578 static void cifsFileInfo_put_work(struct work_struct *work) 579 { 580 struct cifsFileInfo *cifs_file = container_of(work, 581 struct cifsFileInfo, put); 582 583 cifsFileInfo_put_final(cifs_file); 584 } 585 586 /** 587 * cifsFileInfo_put - release a reference of file priv data 588 * 589 * Always potentially wait for oplock handler. See _cifsFileInfo_put(). 590 * 591 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 592 */ 593 void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 594 { 595 _cifsFileInfo_put(cifs_file, true, true); 596 } 597 598 /** 599 * _cifsFileInfo_put - release a reference of file priv data 600 * 601 * This may involve closing the filehandle @cifs_file out on the 602 * server. Must be called without holding tcon->open_file_lock, 603 * cinode->open_file_lock and cifs_file->file_info_lock. 604 * 605 * If @wait_for_oplock_handler is true and we are releasing the last 606 * reference, wait for any running oplock break handler of the file 607 * and cancel any pending one. 608 * 609 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 610 * @wait_oplock_handler: must be false if called from oplock_break_handler 611 * @offload: not offloaded on close and oplock breaks 612 * 613 */ 614 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, 615 bool wait_oplock_handler, bool offload) 616 { 617 struct inode *inode = d_inode(cifs_file->dentry); 618 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 619 struct TCP_Server_Info *server = tcon->ses->server; 620 struct cifsInodeInfo *cifsi = CIFS_I(inode); 621 struct super_block *sb = inode->i_sb; 622 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 623 struct cifs_fid fid = {}; 624 struct cifs_pending_open open; 625 bool oplock_break_cancelled; 626 627 spin_lock(&tcon->open_file_lock); 628 spin_lock(&cifsi->open_file_lock); 629 spin_lock(&cifs_file->file_info_lock); 630 if (--cifs_file->count > 0) { 631 spin_unlock(&cifs_file->file_info_lock); 632 spin_unlock(&cifsi->open_file_lock); 633 spin_unlock(&tcon->open_file_lock); 634 return; 635 } 636 spin_unlock(&cifs_file->file_info_lock); 637 638 if (server->ops->get_lease_key) 639 server->ops->get_lease_key(inode, &fid); 640 641 /* store open in pending opens to make sure we don't miss lease break */ 642 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); 643 644 /* remove it from the lists */ 645 list_del(&cifs_file->flist); 646 list_del(&cifs_file->tlist); 647 atomic_dec(&tcon->num_local_opens); 648 649 if (list_empty(&cifsi->openFileList)) { 650 cifs_dbg(FYI, "closing last open instance for inode %p\n", 651 d_inode(cifs_file->dentry)); 652 /* 653 * In strict cache mode we need invalidate mapping on the last 654 * close because it may cause a error when we open this file 655 * again and get at least level II oplock. 656 */ 657 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) 658 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags); 659 cifs_set_oplock_level(cifsi, 0); 660 } 661 662 spin_unlock(&cifsi->open_file_lock); 663 spin_unlock(&tcon->open_file_lock); 664 665 oplock_break_cancelled = wait_oplock_handler ? 666 cancel_work_sync(&cifs_file->oplock_break) : false; 667 668 if (!tcon->need_reconnect && !cifs_file->invalidHandle) { 669 struct TCP_Server_Info *server = tcon->ses->server; 670 unsigned int xid; 671 672 xid = get_xid(); 673 if (server->ops->close_getattr) 674 server->ops->close_getattr(xid, tcon, cifs_file); 675 else if (server->ops->close) 676 server->ops->close(xid, tcon, &cifs_file->fid); 677 _free_xid(xid); 678 } 679 680 if (oplock_break_cancelled) 681 cifs_done_oplock_break(cifsi); 682 683 cifs_del_pending_open(&open); 684 685 if (offload) 686 queue_work(fileinfo_put_wq, &cifs_file->put); 687 else 688 cifsFileInfo_put_final(cifs_file); 689 } 690 691 int cifs_open(struct inode *inode, struct file *file) 692 693 { 694 int rc = -EACCES; 695 unsigned int xid; 696 __u32 oplock; 697 struct cifs_sb_info *cifs_sb; 698 struct TCP_Server_Info *server; 699 struct cifs_tcon *tcon; 700 struct tcon_link *tlink; 701 struct cifsFileInfo *cfile = NULL; 702 void *page; 703 const char *full_path; 704 bool posix_open_ok = false; 705 struct cifs_fid fid = {}; 706 struct cifs_pending_open open; 707 struct cifs_open_info_data data = {}; 708 709 xid = get_xid(); 710 711 cifs_sb = CIFS_SB(inode->i_sb); 712 if (unlikely(cifs_forced_shutdown(cifs_sb))) { 713 free_xid(xid); 714 return -EIO; 715 } 716 717 tlink = cifs_sb_tlink(cifs_sb); 718 if (IS_ERR(tlink)) { 719 free_xid(xid); 720 return PTR_ERR(tlink); 721 } 722 tcon = tlink_tcon(tlink); 723 server = tcon->ses->server; 724 725 page = alloc_dentry_path(); 726 full_path = build_path_from_dentry(file_dentry(file), page); 727 if (IS_ERR(full_path)) { 728 rc = PTR_ERR(full_path); 729 goto out; 730 } 731 732 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", 733 inode, file->f_flags, full_path); 734 735 if (file->f_flags & O_DIRECT && 736 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { 737 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 738 file->f_op = &cifs_file_direct_nobrl_ops; 739 else 740 file->f_op = &cifs_file_direct_ops; 741 } 742 743 /* Get the cached handle as SMB2 close is deferred */ 744 rc = cifs_get_readable_path(tcon, full_path, &cfile); 745 if (rc == 0) { 746 if (file->f_flags == cfile->f_flags) { 747 file->private_data = cfile; 748 spin_lock(&CIFS_I(inode)->deferred_lock); 749 cifs_del_deferred_close(cfile); 750 spin_unlock(&CIFS_I(inode)->deferred_lock); 751 goto use_cache; 752 } else { 753 _cifsFileInfo_put(cfile, true, false); 754 } 755 } 756 757 if (server->oplocks) 758 oplock = REQ_OPLOCK; 759 else 760 oplock = 0; 761 762 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 763 if (!tcon->broken_posix_open && tcon->unix_ext && 764 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & 765 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 766 /* can not refresh inode info since size could be stale */ 767 rc = cifs_posix_open(full_path, &inode, inode->i_sb, 768 cifs_sb->ctx->file_mode /* ignored */, 769 file->f_flags, &oplock, &fid.netfid, xid); 770 if (rc == 0) { 771 cifs_dbg(FYI, "posix open succeeded\n"); 772 posix_open_ok = true; 773 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 774 if (tcon->ses->serverNOS) 775 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n", 776 tcon->ses->ip_addr, 777 tcon->ses->serverNOS); 778 tcon->broken_posix_open = true; 779 } else if ((rc != -EIO) && (rc != -EREMOTE) && 780 (rc != -EOPNOTSUPP)) /* path not found or net err */ 781 goto out; 782 /* 783 * Else fallthrough to retry open the old way on network i/o 784 * or DFS errors. 785 */ 786 } 787 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 788 789 if (server->ops->get_lease_key) 790 server->ops->get_lease_key(inode, &fid); 791 792 cifs_add_pending_open(&fid, tlink, &open); 793 794 if (!posix_open_ok) { 795 if (server->ops->get_lease_key) 796 server->ops->get_lease_key(inode, &fid); 797 798 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid, 799 xid, &data); 800 if (rc) { 801 cifs_del_pending_open(&open); 802 goto out; 803 } 804 } 805 806 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target); 807 if (cfile == NULL) { 808 if (server->ops->close) 809 server->ops->close(xid, tcon, &fid); 810 cifs_del_pending_open(&open); 811 rc = -ENOMEM; 812 goto out; 813 } 814 815 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 816 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { 817 /* 818 * Time to set mode which we can not set earlier due to 819 * problems creating new read-only files. 820 */ 821 struct cifs_unix_set_info_args args = { 822 .mode = inode->i_mode, 823 .uid = INVALID_UID, /* no change */ 824 .gid = INVALID_GID, /* no change */ 825 .ctime = NO_CHANGE_64, 826 .atime = NO_CHANGE_64, 827 .mtime = NO_CHANGE_64, 828 .device = 0, 829 }; 830 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid, 831 cfile->pid); 832 } 833 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 834 835 use_cache: 836 fscache_use_cookie(cifs_inode_cookie(file_inode(file)), 837 file->f_mode & FMODE_WRITE); 838 if (file->f_flags & O_DIRECT && 839 (!((file->f_flags & O_ACCMODE) != O_RDONLY) || 840 file->f_flags & O_APPEND)) 841 cifs_invalidate_cache(file_inode(file), 842 FSCACHE_INVAL_DIO_WRITE); 843 844 out: 845 free_dentry_path(page); 846 free_xid(xid); 847 cifs_put_tlink(tlink); 848 cifs_free_open_info(&data); 849 return rc; 850 } 851 852 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 853 static int cifs_push_posix_locks(struct cifsFileInfo *cfile); 854 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 855 856 /* 857 * Try to reacquire byte range locks that were released when session 858 * to server was lost. 859 */ 860 static int 861 cifs_relock_file(struct cifsFileInfo *cfile) 862 { 863 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 864 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 865 int rc = 0; 866 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 867 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 868 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 869 870 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); 871 if (cinode->can_cache_brlcks) { 872 /* can cache locks - no need to relock */ 873 up_read(&cinode->lock_sem); 874 return rc; 875 } 876 877 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 878 if (cap_unix(tcon->ses) && 879 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 880 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 881 rc = cifs_push_posix_locks(cfile); 882 else 883 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 884 rc = tcon->ses->server->ops->push_mand_locks(cfile); 885 886 up_read(&cinode->lock_sem); 887 return rc; 888 } 889 890 static int 891 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) 892 { 893 int rc = -EACCES; 894 unsigned int xid; 895 __u32 oplock; 896 struct cifs_sb_info *cifs_sb; 897 struct cifs_tcon *tcon; 898 struct TCP_Server_Info *server; 899 struct cifsInodeInfo *cinode; 900 struct inode *inode; 901 void *page; 902 const char *full_path; 903 int desired_access; 904 int disposition = FILE_OPEN; 905 int create_options = CREATE_NOT_DIR; 906 struct cifs_open_parms oparms; 907 908 xid = get_xid(); 909 mutex_lock(&cfile->fh_mutex); 910 if (!cfile->invalidHandle) { 911 mutex_unlock(&cfile->fh_mutex); 912 free_xid(xid); 913 return 0; 914 } 915 916 inode = d_inode(cfile->dentry); 917 cifs_sb = CIFS_SB(inode->i_sb); 918 tcon = tlink_tcon(cfile->tlink); 919 server = tcon->ses->server; 920 921 /* 922 * Can not grab rename sem here because various ops, including those 923 * that already have the rename sem can end up causing writepage to get 924 * called and if the server was down that means we end up here, and we 925 * can never tell if the caller already has the rename_sem. 926 */ 927 page = alloc_dentry_path(); 928 full_path = build_path_from_dentry(cfile->dentry, page); 929 if (IS_ERR(full_path)) { 930 mutex_unlock(&cfile->fh_mutex); 931 free_dentry_path(page); 932 free_xid(xid); 933 return PTR_ERR(full_path); 934 } 935 936 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n", 937 inode, cfile->f_flags, full_path); 938 939 if (tcon->ses->server->oplocks) 940 oplock = REQ_OPLOCK; 941 else 942 oplock = 0; 943 944 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 945 if (tcon->unix_ext && cap_unix(tcon->ses) && 946 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 947 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 948 /* 949 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the 950 * original open. Must mask them off for a reopen. 951 */ 952 unsigned int oflags = cfile->f_flags & 953 ~(O_CREAT | O_EXCL | O_TRUNC); 954 955 rc = cifs_posix_open(full_path, NULL, inode->i_sb, 956 cifs_sb->ctx->file_mode /* ignored */, 957 oflags, &oplock, &cfile->fid.netfid, xid); 958 if (rc == 0) { 959 cifs_dbg(FYI, "posix reopen succeeded\n"); 960 oparms.reconnect = true; 961 goto reopen_success; 962 } 963 /* 964 * fallthrough to retry open the old way on errors, especially 965 * in the reconnect path it is important to retry hard 966 */ 967 } 968 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 969 970 desired_access = cifs_convert_flags(cfile->f_flags); 971 972 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 973 if (cfile->f_flags & O_SYNC) 974 create_options |= CREATE_WRITE_THROUGH; 975 976 if (cfile->f_flags & O_DIRECT) 977 create_options |= CREATE_NO_BUFFER; 978 979 if (server->ops->get_lease_key) 980 server->ops->get_lease_key(inode, &cfile->fid); 981 982 oparms = (struct cifs_open_parms) { 983 .tcon = tcon, 984 .cifs_sb = cifs_sb, 985 .desired_access = desired_access, 986 .create_options = cifs_create_options(cifs_sb, create_options), 987 .disposition = disposition, 988 .path = full_path, 989 .fid = &cfile->fid, 990 .reconnect = true, 991 }; 992 993 /* 994 * Can not refresh inode by passing in file_info buf to be returned by 995 * ops->open and then calling get_inode_info with returned buf since 996 * file might have write behind data that needs to be flushed and server 997 * version of file size can be stale. If we knew for sure that inode was 998 * not dirty locally we could do this. 999 */ 1000 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1001 if (rc == -ENOENT && oparms.reconnect == false) { 1002 /* durable handle timeout is expired - open the file again */ 1003 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1004 /* indicate that we need to relock the file */ 1005 oparms.reconnect = true; 1006 } 1007 1008 if (rc) { 1009 mutex_unlock(&cfile->fh_mutex); 1010 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); 1011 cifs_dbg(FYI, "oplock: %d\n", oplock); 1012 goto reopen_error_exit; 1013 } 1014 1015 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1016 reopen_success: 1017 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1018 cfile->invalidHandle = false; 1019 mutex_unlock(&cfile->fh_mutex); 1020 cinode = CIFS_I(inode); 1021 1022 if (can_flush) { 1023 rc = filemap_write_and_wait(inode->i_mapping); 1024 if (!is_interrupt_error(rc)) 1025 mapping_set_error(inode->i_mapping, rc); 1026 1027 if (tcon->posix_extensions) { 1028 rc = smb311_posix_get_inode_info(&inode, full_path, 1029 NULL, inode->i_sb, xid); 1030 } else if (tcon->unix_ext) { 1031 rc = cifs_get_inode_info_unix(&inode, full_path, 1032 inode->i_sb, xid); 1033 } else { 1034 rc = cifs_get_inode_info(&inode, full_path, NULL, 1035 inode->i_sb, xid, NULL); 1036 } 1037 } 1038 /* 1039 * Else we are writing out data to server already and could deadlock if 1040 * we tried to flush data, and since we do not know if we have data that 1041 * would invalidate the current end of file on the server we can not go 1042 * to the server to get the new inode info. 1043 */ 1044 1045 /* 1046 * If the server returned a read oplock and we have mandatory brlocks, 1047 * set oplock level to None. 1048 */ 1049 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 1050 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 1051 oplock = 0; 1052 } 1053 1054 server->ops->set_fid(cfile, &cfile->fid, oplock); 1055 if (oparms.reconnect) 1056 cifs_relock_file(cfile); 1057 1058 reopen_error_exit: 1059 free_dentry_path(page); 1060 free_xid(xid); 1061 return rc; 1062 } 1063 1064 void smb2_deferred_work_close(struct work_struct *work) 1065 { 1066 struct cifsFileInfo *cfile = container_of(work, 1067 struct cifsFileInfo, deferred.work); 1068 1069 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1070 cifs_del_deferred_close(cfile); 1071 cfile->deferred_close_scheduled = false; 1072 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1073 _cifsFileInfo_put(cfile, true, false); 1074 } 1075 1076 int cifs_close(struct inode *inode, struct file *file) 1077 { 1078 struct cifsFileInfo *cfile; 1079 struct cifsInodeInfo *cinode = CIFS_I(inode); 1080 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1081 struct cifs_deferred_close *dclose; 1082 1083 cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE); 1084 1085 if (file->private_data != NULL) { 1086 cfile = file->private_data; 1087 file->private_data = NULL; 1088 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); 1089 if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG) 1090 && cinode->lease_granted && 1091 !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) && 1092 dclose && !(cfile->status_file_deleted)) { 1093 if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { 1094 inode_set_mtime_to_ts(inode, 1095 inode_set_ctime_current(inode)); 1096 } 1097 spin_lock(&cinode->deferred_lock); 1098 cifs_add_deferred_close(cfile, dclose); 1099 if (cfile->deferred_close_scheduled && 1100 delayed_work_pending(&cfile->deferred)) { 1101 /* 1102 * If there is no pending work, mod_delayed_work queues new work. 1103 * So, Increase the ref count to avoid use-after-free. 1104 */ 1105 if (!mod_delayed_work(deferredclose_wq, 1106 &cfile->deferred, cifs_sb->ctx->closetimeo)) 1107 cifsFileInfo_get(cfile); 1108 } else { 1109 /* Deferred close for files */ 1110 queue_delayed_work(deferredclose_wq, 1111 &cfile->deferred, cifs_sb->ctx->closetimeo); 1112 cfile->deferred_close_scheduled = true; 1113 spin_unlock(&cinode->deferred_lock); 1114 return 0; 1115 } 1116 spin_unlock(&cinode->deferred_lock); 1117 _cifsFileInfo_put(cfile, true, false); 1118 } else { 1119 _cifsFileInfo_put(cfile, true, false); 1120 kfree(dclose); 1121 } 1122 } 1123 1124 /* return code from the ->release op is always ignored */ 1125 return 0; 1126 } 1127 1128 void 1129 cifs_reopen_persistent_handles(struct cifs_tcon *tcon) 1130 { 1131 struct cifsFileInfo *open_file, *tmp; 1132 struct list_head tmp_list; 1133 1134 if (!tcon->use_persistent || !tcon->need_reopen_files) 1135 return; 1136 1137 tcon->need_reopen_files = false; 1138 1139 cifs_dbg(FYI, "Reopen persistent handles\n"); 1140 INIT_LIST_HEAD(&tmp_list); 1141 1142 /* list all files open on tree connection, reopen resilient handles */ 1143 spin_lock(&tcon->open_file_lock); 1144 list_for_each_entry(open_file, &tcon->openFileList, tlist) { 1145 if (!open_file->invalidHandle) 1146 continue; 1147 cifsFileInfo_get(open_file); 1148 list_add_tail(&open_file->rlist, &tmp_list); 1149 } 1150 spin_unlock(&tcon->open_file_lock); 1151 1152 list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) { 1153 if (cifs_reopen_file(open_file, false /* do not flush */)) 1154 tcon->need_reopen_files = true; 1155 list_del_init(&open_file->rlist); 1156 cifsFileInfo_put(open_file); 1157 } 1158 } 1159 1160 int cifs_closedir(struct inode *inode, struct file *file) 1161 { 1162 int rc = 0; 1163 unsigned int xid; 1164 struct cifsFileInfo *cfile = file->private_data; 1165 struct cifs_tcon *tcon; 1166 struct TCP_Server_Info *server; 1167 char *buf; 1168 1169 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode); 1170 1171 if (cfile == NULL) 1172 return rc; 1173 1174 xid = get_xid(); 1175 tcon = tlink_tcon(cfile->tlink); 1176 server = tcon->ses->server; 1177 1178 cifs_dbg(FYI, "Freeing private data in close dir\n"); 1179 spin_lock(&cfile->file_info_lock); 1180 if (server->ops->dir_needs_close(cfile)) { 1181 cfile->invalidHandle = true; 1182 spin_unlock(&cfile->file_info_lock); 1183 if (server->ops->close_dir) 1184 rc = server->ops->close_dir(xid, tcon, &cfile->fid); 1185 else 1186 rc = -ENOSYS; 1187 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc); 1188 /* not much we can do if it fails anyway, ignore rc */ 1189 rc = 0; 1190 } else 1191 spin_unlock(&cfile->file_info_lock); 1192 1193 buf = cfile->srch_inf.ntwrk_buf_start; 1194 if (buf) { 1195 cifs_dbg(FYI, "closedir free smb buf in srch struct\n"); 1196 cfile->srch_inf.ntwrk_buf_start = NULL; 1197 if (cfile->srch_inf.smallBuf) 1198 cifs_small_buf_release(buf); 1199 else 1200 cifs_buf_release(buf); 1201 } 1202 1203 cifs_put_tlink(cfile->tlink); 1204 kfree(file->private_data); 1205 file->private_data = NULL; 1206 /* BB can we lock the filestruct while this is going on? */ 1207 free_xid(xid); 1208 return rc; 1209 } 1210 1211 static struct cifsLockInfo * 1212 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags) 1213 { 1214 struct cifsLockInfo *lock = 1215 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); 1216 if (!lock) 1217 return lock; 1218 lock->offset = offset; 1219 lock->length = length; 1220 lock->type = type; 1221 lock->pid = current->tgid; 1222 lock->flags = flags; 1223 INIT_LIST_HEAD(&lock->blist); 1224 init_waitqueue_head(&lock->block_q); 1225 return lock; 1226 } 1227 1228 void 1229 cifs_del_lock_waiters(struct cifsLockInfo *lock) 1230 { 1231 struct cifsLockInfo *li, *tmp; 1232 list_for_each_entry_safe(li, tmp, &lock->blist, blist) { 1233 list_del_init(&li->blist); 1234 wake_up(&li->block_q); 1235 } 1236 } 1237 1238 #define CIFS_LOCK_OP 0 1239 #define CIFS_READ_OP 1 1240 #define CIFS_WRITE_OP 2 1241 1242 /* @rw_check : 0 - no op, 1 - read, 2 - write */ 1243 static bool 1244 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, 1245 __u64 length, __u8 type, __u16 flags, 1246 struct cifsFileInfo *cfile, 1247 struct cifsLockInfo **conf_lock, int rw_check) 1248 { 1249 struct cifsLockInfo *li; 1250 struct cifsFileInfo *cur_cfile = fdlocks->cfile; 1251 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1252 1253 list_for_each_entry(li, &fdlocks->locks, llist) { 1254 if (offset + length <= li->offset || 1255 offset >= li->offset + li->length) 1256 continue; 1257 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid && 1258 server->ops->compare_fids(cfile, cur_cfile)) { 1259 /* shared lock prevents write op through the same fid */ 1260 if (!(li->type & server->vals->shared_lock_type) || 1261 rw_check != CIFS_WRITE_OP) 1262 continue; 1263 } 1264 if ((type & server->vals->shared_lock_type) && 1265 ((server->ops->compare_fids(cfile, cur_cfile) && 1266 current->tgid == li->pid) || type == li->type)) 1267 continue; 1268 if (rw_check == CIFS_LOCK_OP && 1269 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) && 1270 server->ops->compare_fids(cfile, cur_cfile)) 1271 continue; 1272 if (conf_lock) 1273 *conf_lock = li; 1274 return true; 1275 } 1276 return false; 1277 } 1278 1279 bool 1280 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1281 __u8 type, __u16 flags, 1282 struct cifsLockInfo **conf_lock, int rw_check) 1283 { 1284 bool rc = false; 1285 struct cifs_fid_locks *cur; 1286 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1287 1288 list_for_each_entry(cur, &cinode->llist, llist) { 1289 rc = cifs_find_fid_lock_conflict(cur, offset, length, type, 1290 flags, cfile, conf_lock, 1291 rw_check); 1292 if (rc) 1293 break; 1294 } 1295 1296 return rc; 1297 } 1298 1299 /* 1300 * Check if there is another lock that prevents us to set the lock (mandatory 1301 * style). If such a lock exists, update the flock structure with its 1302 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1303 * or leave it the same if we can't. Returns 0 if we don't need to request to 1304 * the server or 1 otherwise. 1305 */ 1306 static int 1307 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1308 __u8 type, struct file_lock *flock) 1309 { 1310 int rc = 0; 1311 struct cifsLockInfo *conf_lock; 1312 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1313 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1314 bool exist; 1315 1316 down_read(&cinode->lock_sem); 1317 1318 exist = cifs_find_lock_conflict(cfile, offset, length, type, 1319 flock->c.flc_flags, &conf_lock, 1320 CIFS_LOCK_OP); 1321 if (exist) { 1322 flock->fl_start = conf_lock->offset; 1323 flock->fl_end = conf_lock->offset + conf_lock->length - 1; 1324 flock->c.flc_pid = conf_lock->pid; 1325 if (conf_lock->type & server->vals->shared_lock_type) 1326 flock->c.flc_type = F_RDLCK; 1327 else 1328 flock->c.flc_type = F_WRLCK; 1329 } else if (!cinode->can_cache_brlcks) 1330 rc = 1; 1331 else 1332 flock->c.flc_type = F_UNLCK; 1333 1334 up_read(&cinode->lock_sem); 1335 return rc; 1336 } 1337 1338 static void 1339 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) 1340 { 1341 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1342 cifs_down_write(&cinode->lock_sem); 1343 list_add_tail(&lock->llist, &cfile->llist->locks); 1344 up_write(&cinode->lock_sem); 1345 } 1346 1347 /* 1348 * Set the byte-range lock (mandatory style). Returns: 1349 * 1) 0, if we set the lock and don't need to request to the server; 1350 * 2) 1, if no locks prevent us but we need to request to the server; 1351 * 3) -EACCES, if there is a lock that prevents us and wait is false. 1352 */ 1353 static int 1354 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, 1355 bool wait) 1356 { 1357 struct cifsLockInfo *conf_lock; 1358 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1359 bool exist; 1360 int rc = 0; 1361 1362 try_again: 1363 exist = false; 1364 cifs_down_write(&cinode->lock_sem); 1365 1366 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, 1367 lock->type, lock->flags, &conf_lock, 1368 CIFS_LOCK_OP); 1369 if (!exist && cinode->can_cache_brlcks) { 1370 list_add_tail(&lock->llist, &cfile->llist->locks); 1371 up_write(&cinode->lock_sem); 1372 return rc; 1373 } 1374 1375 if (!exist) 1376 rc = 1; 1377 else if (!wait) 1378 rc = -EACCES; 1379 else { 1380 list_add_tail(&lock->blist, &conf_lock->blist); 1381 up_write(&cinode->lock_sem); 1382 rc = wait_event_interruptible(lock->block_q, 1383 (lock->blist.prev == &lock->blist) && 1384 (lock->blist.next == &lock->blist)); 1385 if (!rc) 1386 goto try_again; 1387 cifs_down_write(&cinode->lock_sem); 1388 list_del_init(&lock->blist); 1389 } 1390 1391 up_write(&cinode->lock_sem); 1392 return rc; 1393 } 1394 1395 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1396 /* 1397 * Check if there is another lock that prevents us to set the lock (posix 1398 * style). If such a lock exists, update the flock structure with its 1399 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1400 * or leave it the same if we can't. Returns 0 if we don't need to request to 1401 * the server or 1 otherwise. 1402 */ 1403 static int 1404 cifs_posix_lock_test(struct file *file, struct file_lock *flock) 1405 { 1406 int rc = 0; 1407 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1408 unsigned char saved_type = flock->c.flc_type; 1409 1410 if ((flock->c.flc_flags & FL_POSIX) == 0) 1411 return 1; 1412 1413 down_read(&cinode->lock_sem); 1414 posix_test_lock(file, flock); 1415 1416 if (lock_is_unlock(flock) && !cinode->can_cache_brlcks) { 1417 flock->c.flc_type = saved_type; 1418 rc = 1; 1419 } 1420 1421 up_read(&cinode->lock_sem); 1422 return rc; 1423 } 1424 1425 /* 1426 * Set the byte-range lock (posix style). Returns: 1427 * 1) <0, if the error occurs while setting the lock; 1428 * 2) 0, if we set the lock and don't need to request to the server; 1429 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock; 1430 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server. 1431 */ 1432 static int 1433 cifs_posix_lock_set(struct file *file, struct file_lock *flock) 1434 { 1435 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1436 int rc = FILE_LOCK_DEFERRED + 1; 1437 1438 if ((flock->c.flc_flags & FL_POSIX) == 0) 1439 return rc; 1440 1441 cifs_down_write(&cinode->lock_sem); 1442 if (!cinode->can_cache_brlcks) { 1443 up_write(&cinode->lock_sem); 1444 return rc; 1445 } 1446 1447 rc = posix_lock_file(file, flock, NULL); 1448 up_write(&cinode->lock_sem); 1449 return rc; 1450 } 1451 1452 int 1453 cifs_push_mandatory_locks(struct cifsFileInfo *cfile) 1454 { 1455 unsigned int xid; 1456 int rc = 0, stored_rc; 1457 struct cifsLockInfo *li, *tmp; 1458 struct cifs_tcon *tcon; 1459 unsigned int num, max_num, max_buf; 1460 LOCKING_ANDX_RANGE *buf, *cur; 1461 static const int types[] = { 1462 LOCKING_ANDX_LARGE_FILES, 1463 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1464 }; 1465 int i; 1466 1467 xid = get_xid(); 1468 tcon = tlink_tcon(cfile->tlink); 1469 1470 /* 1471 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1472 * and check it before using. 1473 */ 1474 max_buf = tcon->ses->server->maxBuf; 1475 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) { 1476 free_xid(xid); 1477 return -EINVAL; 1478 } 1479 1480 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1481 PAGE_SIZE); 1482 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1483 PAGE_SIZE); 1484 max_num = (max_buf - sizeof(struct smb_hdr)) / 1485 sizeof(LOCKING_ANDX_RANGE); 1486 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1487 if (!buf) { 1488 free_xid(xid); 1489 return -ENOMEM; 1490 } 1491 1492 for (i = 0; i < 2; i++) { 1493 cur = buf; 1494 num = 0; 1495 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1496 if (li->type != types[i]) 1497 continue; 1498 cur->Pid = cpu_to_le16(li->pid); 1499 cur->LengthLow = cpu_to_le32((u32)li->length); 1500 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1501 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1502 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1503 if (++num == max_num) { 1504 stored_rc = cifs_lockv(xid, tcon, 1505 cfile->fid.netfid, 1506 (__u8)li->type, 0, num, 1507 buf); 1508 if (stored_rc) 1509 rc = stored_rc; 1510 cur = buf; 1511 num = 0; 1512 } else 1513 cur++; 1514 } 1515 1516 if (num) { 1517 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1518 (__u8)types[i], 0, num, buf); 1519 if (stored_rc) 1520 rc = stored_rc; 1521 } 1522 } 1523 1524 kfree(buf); 1525 free_xid(xid); 1526 return rc; 1527 } 1528 1529 static __u32 1530 hash_lockowner(fl_owner_t owner) 1531 { 1532 return cifs_lock_secret ^ hash32_ptr((const void *)owner); 1533 } 1534 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1535 1536 struct lock_to_push { 1537 struct list_head llist; 1538 __u64 offset; 1539 __u64 length; 1540 __u32 pid; 1541 __u16 netfid; 1542 __u8 type; 1543 }; 1544 1545 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1546 static int 1547 cifs_push_posix_locks(struct cifsFileInfo *cfile) 1548 { 1549 struct inode *inode = d_inode(cfile->dentry); 1550 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1551 struct file_lock *flock; 1552 struct file_lock_context *flctx = locks_inode_context(inode); 1553 unsigned int count = 0, i; 1554 int rc = 0, xid, type; 1555 struct list_head locks_to_send, *el; 1556 struct lock_to_push *lck, *tmp; 1557 __u64 length; 1558 1559 xid = get_xid(); 1560 1561 if (!flctx) 1562 goto out; 1563 1564 spin_lock(&flctx->flc_lock); 1565 list_for_each(el, &flctx->flc_posix) { 1566 count++; 1567 } 1568 spin_unlock(&flctx->flc_lock); 1569 1570 INIT_LIST_HEAD(&locks_to_send); 1571 1572 /* 1573 * Allocating count locks is enough because no FL_POSIX locks can be 1574 * added to the list while we are holding cinode->lock_sem that 1575 * protects locking operations of this inode. 1576 */ 1577 for (i = 0; i < count; i++) { 1578 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); 1579 if (!lck) { 1580 rc = -ENOMEM; 1581 goto err_out; 1582 } 1583 list_add_tail(&lck->llist, &locks_to_send); 1584 } 1585 1586 el = locks_to_send.next; 1587 spin_lock(&flctx->flc_lock); 1588 for_each_file_lock(flock, &flctx->flc_posix) { 1589 unsigned char ftype = flock->c.flc_type; 1590 1591 if (el == &locks_to_send) { 1592 /* 1593 * The list ended. We don't have enough allocated 1594 * structures - something is really wrong. 1595 */ 1596 cifs_dbg(VFS, "Can't push all brlocks!\n"); 1597 break; 1598 } 1599 length = cifs_flock_len(flock); 1600 if (ftype == F_RDLCK || ftype == F_SHLCK) 1601 type = CIFS_RDLCK; 1602 else 1603 type = CIFS_WRLCK; 1604 lck = list_entry(el, struct lock_to_push, llist); 1605 lck->pid = hash_lockowner(flock->c.flc_owner); 1606 lck->netfid = cfile->fid.netfid; 1607 lck->length = length; 1608 lck->type = type; 1609 lck->offset = flock->fl_start; 1610 } 1611 spin_unlock(&flctx->flc_lock); 1612 1613 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1614 int stored_rc; 1615 1616 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid, 1617 lck->offset, lck->length, NULL, 1618 lck->type, 0); 1619 if (stored_rc) 1620 rc = stored_rc; 1621 list_del(&lck->llist); 1622 kfree(lck); 1623 } 1624 1625 out: 1626 free_xid(xid); 1627 return rc; 1628 err_out: 1629 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1630 list_del(&lck->llist); 1631 kfree(lck); 1632 } 1633 goto out; 1634 } 1635 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1636 1637 static int 1638 cifs_push_locks(struct cifsFileInfo *cfile) 1639 { 1640 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1641 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1642 int rc = 0; 1643 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1644 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 1645 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1646 1647 /* we are going to update can_cache_brlcks here - need a write access */ 1648 cifs_down_write(&cinode->lock_sem); 1649 if (!cinode->can_cache_brlcks) { 1650 up_write(&cinode->lock_sem); 1651 return rc; 1652 } 1653 1654 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1655 if (cap_unix(tcon->ses) && 1656 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 1657 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 1658 rc = cifs_push_posix_locks(cfile); 1659 else 1660 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1661 rc = tcon->ses->server->ops->push_mand_locks(cfile); 1662 1663 cinode->can_cache_brlcks = false; 1664 up_write(&cinode->lock_sem); 1665 return rc; 1666 } 1667 1668 static void 1669 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, 1670 bool *wait_flag, struct TCP_Server_Info *server) 1671 { 1672 if (flock->c.flc_flags & FL_POSIX) 1673 cifs_dbg(FYI, "Posix\n"); 1674 if (flock->c.flc_flags & FL_FLOCK) 1675 cifs_dbg(FYI, "Flock\n"); 1676 if (flock->c.flc_flags & FL_SLEEP) { 1677 cifs_dbg(FYI, "Blocking lock\n"); 1678 *wait_flag = true; 1679 } 1680 if (flock->c.flc_flags & FL_ACCESS) 1681 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n"); 1682 if (flock->c.flc_flags & FL_LEASE) 1683 cifs_dbg(FYI, "Lease on file - not implemented yet\n"); 1684 if (flock->c.flc_flags & 1685 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | 1686 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK))) 1687 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", 1688 flock->c.flc_flags); 1689 1690 *type = server->vals->large_lock_type; 1691 if (lock_is_write(flock)) { 1692 cifs_dbg(FYI, "F_WRLCK\n"); 1693 *type |= server->vals->exclusive_lock_type; 1694 *lock = 1; 1695 } else if (lock_is_unlock(flock)) { 1696 cifs_dbg(FYI, "F_UNLCK\n"); 1697 *type |= server->vals->unlock_lock_type; 1698 *unlock = 1; 1699 /* Check if unlock includes more than one lock range */ 1700 } else if (lock_is_read(flock)) { 1701 cifs_dbg(FYI, "F_RDLCK\n"); 1702 *type |= server->vals->shared_lock_type; 1703 *lock = 1; 1704 } else if (flock->c.flc_type == F_EXLCK) { 1705 cifs_dbg(FYI, "F_EXLCK\n"); 1706 *type |= server->vals->exclusive_lock_type; 1707 *lock = 1; 1708 } else if (flock->c.flc_type == F_SHLCK) { 1709 cifs_dbg(FYI, "F_SHLCK\n"); 1710 *type |= server->vals->shared_lock_type; 1711 *lock = 1; 1712 } else 1713 cifs_dbg(FYI, "Unknown type of lock\n"); 1714 } 1715 1716 static int 1717 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, 1718 bool wait_flag, bool posix_lck, unsigned int xid) 1719 { 1720 int rc = 0; 1721 __u64 length = cifs_flock_len(flock); 1722 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1723 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1724 struct TCP_Server_Info *server = tcon->ses->server; 1725 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1726 __u16 netfid = cfile->fid.netfid; 1727 1728 if (posix_lck) { 1729 int posix_lock_type; 1730 1731 rc = cifs_posix_lock_test(file, flock); 1732 if (!rc) 1733 return rc; 1734 1735 if (type & server->vals->shared_lock_type) 1736 posix_lock_type = CIFS_RDLCK; 1737 else 1738 posix_lock_type = CIFS_WRLCK; 1739 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1740 hash_lockowner(flock->c.flc_owner), 1741 flock->fl_start, length, flock, 1742 posix_lock_type, wait_flag); 1743 return rc; 1744 } 1745 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1746 1747 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock); 1748 if (!rc) 1749 return rc; 1750 1751 /* BB we could chain these into one lock request BB */ 1752 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, 1753 1, 0, false); 1754 if (rc == 0) { 1755 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1756 type, 0, 1, false); 1757 flock->c.flc_type = F_UNLCK; 1758 if (rc != 0) 1759 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1760 rc); 1761 return 0; 1762 } 1763 1764 if (type & server->vals->shared_lock_type) { 1765 flock->c.flc_type = F_WRLCK; 1766 return 0; 1767 } 1768 1769 type &= ~server->vals->exclusive_lock_type; 1770 1771 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1772 type | server->vals->shared_lock_type, 1773 1, 0, false); 1774 if (rc == 0) { 1775 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1776 type | server->vals->shared_lock_type, 0, 1, false); 1777 flock->c.flc_type = F_RDLCK; 1778 if (rc != 0) 1779 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1780 rc); 1781 } else 1782 flock->c.flc_type = F_WRLCK; 1783 1784 return 0; 1785 } 1786 1787 void 1788 cifs_move_llist(struct list_head *source, struct list_head *dest) 1789 { 1790 struct list_head *li, *tmp; 1791 list_for_each_safe(li, tmp, source) 1792 list_move(li, dest); 1793 } 1794 1795 void 1796 cifs_free_llist(struct list_head *llist) 1797 { 1798 struct cifsLockInfo *li, *tmp; 1799 list_for_each_entry_safe(li, tmp, llist, llist) { 1800 cifs_del_lock_waiters(li); 1801 list_del(&li->llist); 1802 kfree(li); 1803 } 1804 } 1805 1806 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1807 int 1808 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, 1809 unsigned int xid) 1810 { 1811 int rc = 0, stored_rc; 1812 static const int types[] = { 1813 LOCKING_ANDX_LARGE_FILES, 1814 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1815 }; 1816 unsigned int i; 1817 unsigned int max_num, num, max_buf; 1818 LOCKING_ANDX_RANGE *buf, *cur; 1819 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1820 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1821 struct cifsLockInfo *li, *tmp; 1822 __u64 length = cifs_flock_len(flock); 1823 struct list_head tmp_llist; 1824 1825 INIT_LIST_HEAD(&tmp_llist); 1826 1827 /* 1828 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1829 * and check it before using. 1830 */ 1831 max_buf = tcon->ses->server->maxBuf; 1832 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) 1833 return -EINVAL; 1834 1835 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1836 PAGE_SIZE); 1837 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1838 PAGE_SIZE); 1839 max_num = (max_buf - sizeof(struct smb_hdr)) / 1840 sizeof(LOCKING_ANDX_RANGE); 1841 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1842 if (!buf) 1843 return -ENOMEM; 1844 1845 cifs_down_write(&cinode->lock_sem); 1846 for (i = 0; i < 2; i++) { 1847 cur = buf; 1848 num = 0; 1849 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1850 if (flock->fl_start > li->offset || 1851 (flock->fl_start + length) < 1852 (li->offset + li->length)) 1853 continue; 1854 if (current->tgid != li->pid) 1855 continue; 1856 if (types[i] != li->type) 1857 continue; 1858 if (cinode->can_cache_brlcks) { 1859 /* 1860 * We can cache brlock requests - simply remove 1861 * a lock from the file's list. 1862 */ 1863 list_del(&li->llist); 1864 cifs_del_lock_waiters(li); 1865 kfree(li); 1866 continue; 1867 } 1868 cur->Pid = cpu_to_le16(li->pid); 1869 cur->LengthLow = cpu_to_le32((u32)li->length); 1870 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1871 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1872 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1873 /* 1874 * We need to save a lock here to let us add it again to 1875 * the file's list if the unlock range request fails on 1876 * the server. 1877 */ 1878 list_move(&li->llist, &tmp_llist); 1879 if (++num == max_num) { 1880 stored_rc = cifs_lockv(xid, tcon, 1881 cfile->fid.netfid, 1882 li->type, num, 0, buf); 1883 if (stored_rc) { 1884 /* 1885 * We failed on the unlock range 1886 * request - add all locks from the tmp 1887 * list to the head of the file's list. 1888 */ 1889 cifs_move_llist(&tmp_llist, 1890 &cfile->llist->locks); 1891 rc = stored_rc; 1892 } else 1893 /* 1894 * The unlock range request succeed - 1895 * free the tmp list. 1896 */ 1897 cifs_free_llist(&tmp_llist); 1898 cur = buf; 1899 num = 0; 1900 } else 1901 cur++; 1902 } 1903 if (num) { 1904 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1905 types[i], num, 0, buf); 1906 if (stored_rc) { 1907 cifs_move_llist(&tmp_llist, 1908 &cfile->llist->locks); 1909 rc = stored_rc; 1910 } else 1911 cifs_free_llist(&tmp_llist); 1912 } 1913 } 1914 1915 up_write(&cinode->lock_sem); 1916 kfree(buf); 1917 return rc; 1918 } 1919 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1920 1921 static int 1922 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, 1923 bool wait_flag, bool posix_lck, int lock, int unlock, 1924 unsigned int xid) 1925 { 1926 int rc = 0; 1927 __u64 length = cifs_flock_len(flock); 1928 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1929 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1930 struct TCP_Server_Info *server = tcon->ses->server; 1931 struct inode *inode = d_inode(cfile->dentry); 1932 1933 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1934 if (posix_lck) { 1935 int posix_lock_type; 1936 1937 rc = cifs_posix_lock_set(file, flock); 1938 if (rc <= FILE_LOCK_DEFERRED) 1939 return rc; 1940 1941 if (type & server->vals->shared_lock_type) 1942 posix_lock_type = CIFS_RDLCK; 1943 else 1944 posix_lock_type = CIFS_WRLCK; 1945 1946 if (unlock == 1) 1947 posix_lock_type = CIFS_UNLCK; 1948 1949 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, 1950 hash_lockowner(flock->c.flc_owner), 1951 flock->fl_start, length, 1952 NULL, posix_lock_type, wait_flag); 1953 goto out; 1954 } 1955 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1956 if (lock) { 1957 struct cifsLockInfo *lock; 1958 1959 lock = cifs_lock_init(flock->fl_start, length, type, 1960 flock->c.flc_flags); 1961 if (!lock) 1962 return -ENOMEM; 1963 1964 rc = cifs_lock_add_if(cfile, lock, wait_flag); 1965 if (rc < 0) { 1966 kfree(lock); 1967 return rc; 1968 } 1969 if (!rc) 1970 goto out; 1971 1972 /* 1973 * Windows 7 server can delay breaking lease from read to None 1974 * if we set a byte-range lock on a file - break it explicitly 1975 * before sending the lock to the server to be sure the next 1976 * read won't conflict with non-overlapted locks due to 1977 * pagereading. 1978 */ 1979 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) && 1980 CIFS_CACHE_READ(CIFS_I(inode))) { 1981 cifs_zap_mapping(inode); 1982 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", 1983 inode); 1984 CIFS_I(inode)->oplock = 0; 1985 } 1986 1987 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1988 type, 1, 0, wait_flag); 1989 if (rc) { 1990 kfree(lock); 1991 return rc; 1992 } 1993 1994 cifs_lock_add(cfile, lock); 1995 } else if (unlock) 1996 rc = server->ops->mand_unlock_range(cfile, flock, xid); 1997 1998 out: 1999 if ((flock->c.flc_flags & FL_POSIX) || (flock->c.flc_flags & FL_FLOCK)) { 2000 /* 2001 * If this is a request to remove all locks because we 2002 * are closing the file, it doesn't matter if the 2003 * unlocking failed as both cifs.ko and the SMB server 2004 * remove the lock on file close 2005 */ 2006 if (rc) { 2007 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc); 2008 if (!(flock->c.flc_flags & FL_CLOSE)) 2009 return rc; 2010 } 2011 rc = locks_lock_file_wait(file, flock); 2012 } 2013 return rc; 2014 } 2015 2016 int cifs_flock(struct file *file, int cmd, struct file_lock *fl) 2017 { 2018 int rc, xid; 2019 int lock = 0, unlock = 0; 2020 bool wait_flag = false; 2021 bool posix_lck = false; 2022 struct cifs_sb_info *cifs_sb; 2023 struct cifs_tcon *tcon; 2024 struct cifsFileInfo *cfile; 2025 __u32 type; 2026 2027 xid = get_xid(); 2028 2029 if (!(fl->c.flc_flags & FL_FLOCK)) { 2030 rc = -ENOLCK; 2031 free_xid(xid); 2032 return rc; 2033 } 2034 2035 cfile = (struct cifsFileInfo *)file->private_data; 2036 tcon = tlink_tcon(cfile->tlink); 2037 2038 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag, 2039 tcon->ses->server); 2040 cifs_sb = CIFS_FILE_SB(file); 2041 2042 if (cap_unix(tcon->ses) && 2043 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2044 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2045 posix_lck = true; 2046 2047 if (!lock && !unlock) { 2048 /* 2049 * if no lock or unlock then nothing to do since we do not 2050 * know what it is 2051 */ 2052 rc = -EOPNOTSUPP; 2053 free_xid(xid); 2054 return rc; 2055 } 2056 2057 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock, 2058 xid); 2059 free_xid(xid); 2060 return rc; 2061 2062 2063 } 2064 2065 int cifs_lock(struct file *file, int cmd, struct file_lock *flock) 2066 { 2067 int rc, xid; 2068 int lock = 0, unlock = 0; 2069 bool wait_flag = false; 2070 bool posix_lck = false; 2071 struct cifs_sb_info *cifs_sb; 2072 struct cifs_tcon *tcon; 2073 struct cifsFileInfo *cfile; 2074 __u32 type; 2075 2076 rc = -EACCES; 2077 xid = get_xid(); 2078 2079 cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd, 2080 flock->c.flc_flags, flock->c.flc_type, 2081 (long long)flock->fl_start, 2082 (long long)flock->fl_end); 2083 2084 cfile = (struct cifsFileInfo *)file->private_data; 2085 tcon = tlink_tcon(cfile->tlink); 2086 2087 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, 2088 tcon->ses->server); 2089 cifs_sb = CIFS_FILE_SB(file); 2090 set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags); 2091 2092 if (cap_unix(tcon->ses) && 2093 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2094 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2095 posix_lck = true; 2096 /* 2097 * BB add code here to normalize offset and length to account for 2098 * negative length which we can not accept over the wire. 2099 */ 2100 if (IS_GETLK(cmd)) { 2101 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid); 2102 free_xid(xid); 2103 return rc; 2104 } 2105 2106 if (!lock && !unlock) { 2107 /* 2108 * if no lock or unlock then nothing to do since we do not 2109 * know what it is 2110 */ 2111 free_xid(xid); 2112 return -EOPNOTSUPP; 2113 } 2114 2115 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock, 2116 xid); 2117 free_xid(xid); 2118 return rc; 2119 } 2120 2121 /* 2122 * update the file size (if needed) after a write. Should be called with 2123 * the inode->i_lock held 2124 */ 2125 void 2126 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 2127 unsigned int bytes_written) 2128 { 2129 loff_t end_of_write = offset + bytes_written; 2130 2131 if (end_of_write > cifsi->netfs.remote_i_size) 2132 netfs_resize_file(&cifsi->netfs, end_of_write, true); 2133 } 2134 2135 static ssize_t 2136 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, 2137 size_t write_size, loff_t *offset) 2138 { 2139 int rc = 0; 2140 unsigned int bytes_written = 0; 2141 unsigned int total_written; 2142 struct cifs_tcon *tcon; 2143 struct TCP_Server_Info *server; 2144 unsigned int xid; 2145 struct dentry *dentry = open_file->dentry; 2146 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry)); 2147 struct cifs_io_parms io_parms = {0}; 2148 2149 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", 2150 write_size, *offset, dentry); 2151 2152 tcon = tlink_tcon(open_file->tlink); 2153 server = tcon->ses->server; 2154 2155 if (!server->ops->sync_write) 2156 return -ENOSYS; 2157 2158 xid = get_xid(); 2159 2160 for (total_written = 0; write_size > total_written; 2161 total_written += bytes_written) { 2162 rc = -EAGAIN; 2163 while (rc == -EAGAIN) { 2164 struct kvec iov[2]; 2165 unsigned int len; 2166 2167 if (open_file->invalidHandle) { 2168 /* we could deadlock if we called 2169 filemap_fdatawait from here so tell 2170 reopen_file not to flush data to 2171 server now */ 2172 rc = cifs_reopen_file(open_file, false); 2173 if (rc != 0) 2174 break; 2175 } 2176 2177 len = min(server->ops->wp_retry_size(d_inode(dentry)), 2178 (unsigned int)write_size - total_written); 2179 /* iov[0] is reserved for smb header */ 2180 iov[1].iov_base = (char *)write_data + total_written; 2181 iov[1].iov_len = len; 2182 io_parms.pid = pid; 2183 io_parms.tcon = tcon; 2184 io_parms.offset = *offset; 2185 io_parms.length = len; 2186 rc = server->ops->sync_write(xid, &open_file->fid, 2187 &io_parms, &bytes_written, iov, 1); 2188 } 2189 if (rc || (bytes_written == 0)) { 2190 if (total_written) 2191 break; 2192 else { 2193 free_xid(xid); 2194 return rc; 2195 } 2196 } else { 2197 spin_lock(&d_inode(dentry)->i_lock); 2198 cifs_update_eof(cifsi, *offset, bytes_written); 2199 spin_unlock(&d_inode(dentry)->i_lock); 2200 *offset += bytes_written; 2201 } 2202 } 2203 2204 cifs_stats_bytes_written(tcon, total_written); 2205 2206 if (total_written > 0) { 2207 spin_lock(&d_inode(dentry)->i_lock); 2208 if (*offset > d_inode(dentry)->i_size) { 2209 i_size_write(d_inode(dentry), *offset); 2210 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9; 2211 } 2212 spin_unlock(&d_inode(dentry)->i_lock); 2213 } 2214 mark_inode_dirty_sync(d_inode(dentry)); 2215 free_xid(xid); 2216 return total_written; 2217 } 2218 2219 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, 2220 bool fsuid_only) 2221 { 2222 struct cifsFileInfo *open_file = NULL; 2223 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2224 2225 /* only filter by fsuid on multiuser mounts */ 2226 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2227 fsuid_only = false; 2228 2229 spin_lock(&cifs_inode->open_file_lock); 2230 /* we could simply get the first_list_entry since write-only entries 2231 are always at the end of the list but since the first entry might 2232 have a close pending, we go through the whole list */ 2233 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2234 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2235 continue; 2236 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { 2237 if ((!open_file->invalidHandle)) { 2238 /* found a good file */ 2239 /* lock it so it will not be closed on us */ 2240 cifsFileInfo_get(open_file); 2241 spin_unlock(&cifs_inode->open_file_lock); 2242 return open_file; 2243 } /* else might as well continue, and look for 2244 another, or simply have the caller reopen it 2245 again rather than trying to fix this handle */ 2246 } else /* write only file */ 2247 break; /* write only files are last so must be done */ 2248 } 2249 spin_unlock(&cifs_inode->open_file_lock); 2250 return NULL; 2251 } 2252 2253 /* Return -EBADF if no handle is found and general rc otherwise */ 2254 int 2255 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, 2256 struct cifsFileInfo **ret_file) 2257 { 2258 struct cifsFileInfo *open_file, *inv_file = NULL; 2259 struct cifs_sb_info *cifs_sb; 2260 bool any_available = false; 2261 int rc = -EBADF; 2262 unsigned int refind = 0; 2263 bool fsuid_only = flags & FIND_WR_FSUID_ONLY; 2264 bool with_delete = flags & FIND_WR_WITH_DELETE; 2265 *ret_file = NULL; 2266 2267 /* 2268 * Having a null inode here (because mapping->host was set to zero by 2269 * the VFS or MM) should not happen but we had reports of on oops (due 2270 * to it being zero) during stress testcases so we need to check for it 2271 */ 2272 2273 if (cifs_inode == NULL) { 2274 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n"); 2275 dump_stack(); 2276 return rc; 2277 } 2278 2279 cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2280 2281 /* only filter by fsuid on multiuser mounts */ 2282 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2283 fsuid_only = false; 2284 2285 spin_lock(&cifs_inode->open_file_lock); 2286 refind_writable: 2287 if (refind > MAX_REOPEN_ATT) { 2288 spin_unlock(&cifs_inode->open_file_lock); 2289 return rc; 2290 } 2291 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2292 if (!any_available && open_file->pid != current->tgid) 2293 continue; 2294 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2295 continue; 2296 if (with_delete && !(open_file->fid.access & DELETE)) 2297 continue; 2298 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 2299 if (!open_file->invalidHandle) { 2300 /* found a good writable file */ 2301 cifsFileInfo_get(open_file); 2302 spin_unlock(&cifs_inode->open_file_lock); 2303 *ret_file = open_file; 2304 return 0; 2305 } else { 2306 if (!inv_file) 2307 inv_file = open_file; 2308 } 2309 } 2310 } 2311 /* couldn't find useable FH with same pid, try any available */ 2312 if (!any_available) { 2313 any_available = true; 2314 goto refind_writable; 2315 } 2316 2317 if (inv_file) { 2318 any_available = false; 2319 cifsFileInfo_get(inv_file); 2320 } 2321 2322 spin_unlock(&cifs_inode->open_file_lock); 2323 2324 if (inv_file) { 2325 rc = cifs_reopen_file(inv_file, false); 2326 if (!rc) { 2327 *ret_file = inv_file; 2328 return 0; 2329 } 2330 2331 spin_lock(&cifs_inode->open_file_lock); 2332 list_move_tail(&inv_file->flist, &cifs_inode->openFileList); 2333 spin_unlock(&cifs_inode->open_file_lock); 2334 cifsFileInfo_put(inv_file); 2335 ++refind; 2336 inv_file = NULL; 2337 spin_lock(&cifs_inode->open_file_lock); 2338 goto refind_writable; 2339 } 2340 2341 return rc; 2342 } 2343 2344 struct cifsFileInfo * 2345 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags) 2346 { 2347 struct cifsFileInfo *cfile; 2348 int rc; 2349 2350 rc = cifs_get_writable_file(cifs_inode, flags, &cfile); 2351 if (rc) 2352 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc); 2353 2354 return cfile; 2355 } 2356 2357 int 2358 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, 2359 int flags, 2360 struct cifsFileInfo **ret_file) 2361 { 2362 struct cifsFileInfo *cfile; 2363 void *page = alloc_dentry_path(); 2364 2365 *ret_file = NULL; 2366 2367 spin_lock(&tcon->open_file_lock); 2368 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2369 struct cifsInodeInfo *cinode; 2370 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2371 if (IS_ERR(full_path)) { 2372 spin_unlock(&tcon->open_file_lock); 2373 free_dentry_path(page); 2374 return PTR_ERR(full_path); 2375 } 2376 if (strcmp(full_path, name)) 2377 continue; 2378 2379 cinode = CIFS_I(d_inode(cfile->dentry)); 2380 spin_unlock(&tcon->open_file_lock); 2381 free_dentry_path(page); 2382 return cifs_get_writable_file(cinode, flags, ret_file); 2383 } 2384 2385 spin_unlock(&tcon->open_file_lock); 2386 free_dentry_path(page); 2387 return -ENOENT; 2388 } 2389 2390 int 2391 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, 2392 struct cifsFileInfo **ret_file) 2393 { 2394 struct cifsFileInfo *cfile; 2395 void *page = alloc_dentry_path(); 2396 2397 *ret_file = NULL; 2398 2399 spin_lock(&tcon->open_file_lock); 2400 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2401 struct cifsInodeInfo *cinode; 2402 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2403 if (IS_ERR(full_path)) { 2404 spin_unlock(&tcon->open_file_lock); 2405 free_dentry_path(page); 2406 return PTR_ERR(full_path); 2407 } 2408 if (strcmp(full_path, name)) 2409 continue; 2410 2411 cinode = CIFS_I(d_inode(cfile->dentry)); 2412 spin_unlock(&tcon->open_file_lock); 2413 free_dentry_path(page); 2414 *ret_file = find_readable_file(cinode, 0); 2415 return *ret_file ? 0 : -ENOENT; 2416 } 2417 2418 spin_unlock(&tcon->open_file_lock); 2419 free_dentry_path(page); 2420 return -ENOENT; 2421 } 2422 2423 void 2424 cifs_writedata_release(struct kref *refcount) 2425 { 2426 struct cifs_writedata *wdata = container_of(refcount, 2427 struct cifs_writedata, refcount); 2428 #ifdef CONFIG_CIFS_SMB_DIRECT 2429 if (wdata->mr) { 2430 smbd_deregister_mr(wdata->mr); 2431 wdata->mr = NULL; 2432 } 2433 #endif 2434 2435 if (wdata->cfile) 2436 cifsFileInfo_put(wdata->cfile); 2437 2438 kfree(wdata); 2439 } 2440 2441 /* 2442 * Write failed with a retryable error. Resend the write request. It's also 2443 * possible that the page was redirtied so re-clean the page. 2444 */ 2445 static void 2446 cifs_writev_requeue(struct cifs_writedata *wdata) 2447 { 2448 int rc = 0; 2449 struct inode *inode = d_inode(wdata->cfile->dentry); 2450 struct TCP_Server_Info *server; 2451 unsigned int rest_len = wdata->bytes; 2452 loff_t fpos = wdata->offset; 2453 2454 server = tlink_tcon(wdata->cfile->tlink)->ses->server; 2455 do { 2456 struct cifs_writedata *wdata2; 2457 unsigned int wsize, cur_len; 2458 2459 wsize = server->ops->wp_retry_size(inode); 2460 if (wsize < rest_len) { 2461 if (wsize < PAGE_SIZE) { 2462 rc = -EOPNOTSUPP; 2463 break; 2464 } 2465 cur_len = min(round_down(wsize, PAGE_SIZE), rest_len); 2466 } else { 2467 cur_len = rest_len; 2468 } 2469 2470 wdata2 = cifs_writedata_alloc(cifs_writev_complete); 2471 if (!wdata2) { 2472 rc = -ENOMEM; 2473 break; 2474 } 2475 2476 wdata2->sync_mode = wdata->sync_mode; 2477 wdata2->offset = fpos; 2478 wdata2->bytes = cur_len; 2479 wdata2->iter = wdata->iter; 2480 2481 iov_iter_advance(&wdata2->iter, fpos - wdata->offset); 2482 iov_iter_truncate(&wdata2->iter, wdata2->bytes); 2483 2484 if (iov_iter_is_xarray(&wdata2->iter)) 2485 /* Check for pages having been redirtied and clean 2486 * them. We can do this by walking the xarray. If 2487 * it's not an xarray, then it's a DIO and we shouldn't 2488 * be mucking around with the page bits. 2489 */ 2490 cifs_undirty_folios(inode, fpos, cur_len); 2491 2492 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, 2493 &wdata2->cfile); 2494 if (!wdata2->cfile) { 2495 cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", 2496 rc); 2497 if (!is_retryable_error(rc)) 2498 rc = -EBADF; 2499 } else { 2500 wdata2->pid = wdata2->cfile->pid; 2501 rc = server->ops->async_writev(wdata2, 2502 cifs_writedata_release); 2503 } 2504 2505 kref_put(&wdata2->refcount, cifs_writedata_release); 2506 if (rc) { 2507 if (is_retryable_error(rc)) 2508 continue; 2509 fpos += cur_len; 2510 rest_len -= cur_len; 2511 break; 2512 } 2513 2514 fpos += cur_len; 2515 rest_len -= cur_len; 2516 } while (rest_len > 0); 2517 2518 /* Clean up remaining pages from the original wdata */ 2519 if (iov_iter_is_xarray(&wdata->iter)) 2520 cifs_pages_write_failed(inode, fpos, rest_len); 2521 2522 if (rc != 0 && !is_retryable_error(rc)) 2523 mapping_set_error(inode->i_mapping, rc); 2524 kref_put(&wdata->refcount, cifs_writedata_release); 2525 } 2526 2527 void 2528 cifs_writev_complete(struct work_struct *work) 2529 { 2530 struct cifs_writedata *wdata = container_of(work, 2531 struct cifs_writedata, work); 2532 struct inode *inode = d_inode(wdata->cfile->dentry); 2533 2534 if (wdata->result == 0) { 2535 spin_lock(&inode->i_lock); 2536 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); 2537 spin_unlock(&inode->i_lock); 2538 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink), 2539 wdata->bytes); 2540 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) 2541 return cifs_writev_requeue(wdata); 2542 2543 if (wdata->result == -EAGAIN) 2544 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes); 2545 else if (wdata->result < 0) 2546 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes); 2547 else 2548 cifs_pages_written_back(inode, wdata->offset, wdata->bytes); 2549 2550 if (wdata->result != -EAGAIN) 2551 mapping_set_error(inode->i_mapping, wdata->result); 2552 kref_put(&wdata->refcount, cifs_writedata_release); 2553 } 2554 2555 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete) 2556 { 2557 struct cifs_writedata *wdata; 2558 2559 wdata = kzalloc(sizeof(*wdata), GFP_NOFS); 2560 if (wdata != NULL) { 2561 kref_init(&wdata->refcount); 2562 INIT_LIST_HEAD(&wdata->list); 2563 init_completion(&wdata->done); 2564 INIT_WORK(&wdata->work, complete); 2565 } 2566 return wdata; 2567 } 2568 2569 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) 2570 { 2571 struct address_space *mapping = page->mapping; 2572 loff_t offset = (loff_t)page->index << PAGE_SHIFT; 2573 char *write_data; 2574 int rc = -EFAULT; 2575 int bytes_written = 0; 2576 struct inode *inode; 2577 struct cifsFileInfo *open_file; 2578 2579 if (!mapping || !mapping->host) 2580 return -EFAULT; 2581 2582 inode = page->mapping->host; 2583 2584 offset += (loff_t)from; 2585 write_data = kmap(page); 2586 write_data += from; 2587 2588 if ((to > PAGE_SIZE) || (from > to)) { 2589 kunmap(page); 2590 return -EIO; 2591 } 2592 2593 /* racing with truncate? */ 2594 if (offset > mapping->host->i_size) { 2595 kunmap(page); 2596 return 0; /* don't care */ 2597 } 2598 2599 /* check to make sure that we are not extending the file */ 2600 if (mapping->host->i_size - offset < (loff_t)to) 2601 to = (unsigned)(mapping->host->i_size - offset); 2602 2603 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, 2604 &open_file); 2605 if (!rc) { 2606 bytes_written = cifs_write(open_file, open_file->pid, 2607 write_data, to - from, &offset); 2608 cifsFileInfo_put(open_file); 2609 /* Does mm or vfs already set times? */ 2610 simple_inode_init_ts(inode); 2611 if ((bytes_written > 0) && (offset)) 2612 rc = 0; 2613 else if (bytes_written < 0) 2614 rc = bytes_written; 2615 else 2616 rc = -EFAULT; 2617 } else { 2618 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc); 2619 if (!is_retryable_error(rc)) 2620 rc = -EIO; 2621 } 2622 2623 kunmap(page); 2624 return rc; 2625 } 2626 2627 /* 2628 * Extend the region to be written back to include subsequent contiguously 2629 * dirty pages if possible, but don't sleep while doing so. 2630 */ 2631 static void cifs_extend_writeback(struct address_space *mapping, 2632 struct xa_state *xas, 2633 long *_count, 2634 loff_t start, 2635 int max_pages, 2636 loff_t max_len, 2637 size_t *_len) 2638 { 2639 struct folio_batch batch; 2640 struct folio *folio; 2641 unsigned int nr_pages; 2642 pgoff_t index = (start + *_len) / PAGE_SIZE; 2643 size_t len; 2644 bool stop = true; 2645 unsigned int i; 2646 2647 folio_batch_init(&batch); 2648 2649 do { 2650 /* Firstly, we gather up a batch of contiguous dirty pages 2651 * under the RCU read lock - but we can't clear the dirty flags 2652 * there if any of those pages are mapped. 2653 */ 2654 rcu_read_lock(); 2655 2656 xas_for_each(xas, folio, ULONG_MAX) { 2657 stop = true; 2658 if (xas_retry(xas, folio)) 2659 continue; 2660 if (xa_is_value(folio)) 2661 break; 2662 if (folio->index != index) { 2663 xas_reset(xas); 2664 break; 2665 } 2666 2667 if (!folio_try_get_rcu(folio)) { 2668 xas_reset(xas); 2669 continue; 2670 } 2671 nr_pages = folio_nr_pages(folio); 2672 if (nr_pages > max_pages) { 2673 xas_reset(xas); 2674 break; 2675 } 2676 2677 /* Has the page moved or been split? */ 2678 if (unlikely(folio != xas_reload(xas))) { 2679 folio_put(folio); 2680 xas_reset(xas); 2681 break; 2682 } 2683 2684 if (!folio_trylock(folio)) { 2685 folio_put(folio); 2686 xas_reset(xas); 2687 break; 2688 } 2689 if (!folio_test_dirty(folio) || 2690 folio_test_writeback(folio)) { 2691 folio_unlock(folio); 2692 folio_put(folio); 2693 xas_reset(xas); 2694 break; 2695 } 2696 2697 max_pages -= nr_pages; 2698 len = folio_size(folio); 2699 stop = false; 2700 2701 index += nr_pages; 2702 *_count -= nr_pages; 2703 *_len += len; 2704 if (max_pages <= 0 || *_len >= max_len || *_count <= 0) 2705 stop = true; 2706 2707 if (!folio_batch_add(&batch, folio)) 2708 break; 2709 if (stop) 2710 break; 2711 } 2712 2713 xas_pause(xas); 2714 rcu_read_unlock(); 2715 2716 /* Now, if we obtained any pages, we can shift them to being 2717 * writable and mark them for caching. 2718 */ 2719 if (!folio_batch_count(&batch)) 2720 break; 2721 2722 for (i = 0; i < folio_batch_count(&batch); i++) { 2723 folio = batch.folios[i]; 2724 /* The folio should be locked, dirty and not undergoing 2725 * writeback from the loop above. 2726 */ 2727 if (!folio_clear_dirty_for_io(folio)) 2728 WARN_ON(1); 2729 folio_start_writeback(folio); 2730 folio_unlock(folio); 2731 } 2732 2733 folio_batch_release(&batch); 2734 cond_resched(); 2735 } while (!stop); 2736 } 2737 2738 /* 2739 * Write back the locked page and any subsequent non-locked dirty pages. 2740 */ 2741 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, 2742 struct writeback_control *wbc, 2743 struct xa_state *xas, 2744 struct folio *folio, 2745 unsigned long long start, 2746 unsigned long long end) 2747 { 2748 struct inode *inode = mapping->host; 2749 struct TCP_Server_Info *server; 2750 struct cifs_writedata *wdata; 2751 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2752 struct cifs_credits credits_on_stack; 2753 struct cifs_credits *credits = &credits_on_stack; 2754 struct cifsFileInfo *cfile = NULL; 2755 unsigned long long i_size = i_size_read(inode), max_len; 2756 unsigned int xid, wsize; 2757 size_t len = folio_size(folio); 2758 long count = wbc->nr_to_write; 2759 int rc; 2760 2761 /* The folio should be locked, dirty and not undergoing writeback. */ 2762 if (!folio_clear_dirty_for_io(folio)) 2763 WARN_ON_ONCE(1); 2764 folio_start_writeback(folio); 2765 2766 count -= folio_nr_pages(folio); 2767 2768 xid = get_xid(); 2769 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); 2770 2771 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); 2772 if (rc) { 2773 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc); 2774 goto err_xid; 2775 } 2776 2777 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 2778 &wsize, credits); 2779 if (rc != 0) 2780 goto err_close; 2781 2782 wdata = cifs_writedata_alloc(cifs_writev_complete); 2783 if (!wdata) { 2784 rc = -ENOMEM; 2785 goto err_uncredit; 2786 } 2787 2788 wdata->sync_mode = wbc->sync_mode; 2789 wdata->offset = folio_pos(folio); 2790 wdata->pid = cfile->pid; 2791 wdata->credits = credits_on_stack; 2792 wdata->cfile = cfile; 2793 wdata->server = server; 2794 cfile = NULL; 2795 2796 /* Find all consecutive lockable dirty pages that have contiguous 2797 * written regions, stopping when we find a page that is not 2798 * immediately lockable, is not dirty or is missing, or we reach the 2799 * end of the range. 2800 */ 2801 if (start < i_size) { 2802 /* Trim the write to the EOF; the extra data is ignored. Also 2803 * put an upper limit on the size of a single storedata op. 2804 */ 2805 max_len = wsize; 2806 max_len = min_t(unsigned long long, max_len, end - start + 1); 2807 max_len = min_t(unsigned long long, max_len, i_size - start); 2808 2809 if (len < max_len) { 2810 int max_pages = INT_MAX; 2811 2812 #ifdef CONFIG_CIFS_SMB_DIRECT 2813 if (server->smbd_conn) 2814 max_pages = server->smbd_conn->max_frmr_depth; 2815 #endif 2816 max_pages -= folio_nr_pages(folio); 2817 2818 if (max_pages > 0) 2819 cifs_extend_writeback(mapping, xas, &count, start, 2820 max_pages, max_len, &len); 2821 } 2822 } 2823 len = min_t(unsigned long long, len, i_size - start); 2824 2825 /* We now have a contiguous set of dirty pages, each with writeback 2826 * set; the first page is still locked at this point, but all the rest 2827 * have been unlocked. 2828 */ 2829 folio_unlock(folio); 2830 wdata->bytes = len; 2831 2832 if (start < i_size) { 2833 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, 2834 start, len); 2835 2836 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); 2837 if (rc) 2838 goto err_wdata; 2839 2840 if (wdata->cfile->invalidHandle) 2841 rc = -EAGAIN; 2842 else 2843 rc = wdata->server->ops->async_writev(wdata, 2844 cifs_writedata_release); 2845 if (rc >= 0) { 2846 kref_put(&wdata->refcount, cifs_writedata_release); 2847 goto err_close; 2848 } 2849 } else { 2850 /* The dirty region was entirely beyond the EOF. */ 2851 cifs_pages_written_back(inode, start, len); 2852 rc = 0; 2853 } 2854 2855 err_wdata: 2856 kref_put(&wdata->refcount, cifs_writedata_release); 2857 err_uncredit: 2858 add_credits_and_wake_if(server, credits, 0); 2859 err_close: 2860 if (cfile) 2861 cifsFileInfo_put(cfile); 2862 err_xid: 2863 free_xid(xid); 2864 if (rc == 0) { 2865 wbc->nr_to_write = count; 2866 rc = len; 2867 } else if (is_retryable_error(rc)) { 2868 cifs_pages_write_redirty(inode, start, len); 2869 } else { 2870 cifs_pages_write_failed(inode, start, len); 2871 mapping_set_error(mapping, rc); 2872 } 2873 /* Indication to update ctime and mtime as close is deferred */ 2874 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 2875 return rc; 2876 } 2877 2878 /* 2879 * write a region of pages back to the server 2880 */ 2881 static ssize_t cifs_writepages_begin(struct address_space *mapping, 2882 struct writeback_control *wbc, 2883 struct xa_state *xas, 2884 unsigned long long *_start, 2885 unsigned long long end) 2886 { 2887 struct folio *folio; 2888 unsigned long long start = *_start; 2889 ssize_t ret; 2890 int skips = 0; 2891 2892 search_again: 2893 /* Find the first dirty page. */ 2894 rcu_read_lock(); 2895 2896 for (;;) { 2897 folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 2898 if (xas_retry(xas, folio) || xa_is_value(folio)) 2899 continue; 2900 if (!folio) 2901 break; 2902 2903 if (!folio_try_get_rcu(folio)) { 2904 xas_reset(xas); 2905 continue; 2906 } 2907 2908 if (unlikely(folio != xas_reload(xas))) { 2909 folio_put(folio); 2910 xas_reset(xas); 2911 continue; 2912 } 2913 2914 xas_pause(xas); 2915 break; 2916 } 2917 rcu_read_unlock(); 2918 if (!folio) 2919 return 0; 2920 2921 start = folio_pos(folio); /* May regress with THPs */ 2922 2923 /* At this point we hold neither the i_pages lock nor the page lock: 2924 * the page may be truncated or invalidated (changing page->mapping to 2925 * NULL), or even swizzled back from swapper_space to tmpfs file 2926 * mapping 2927 */ 2928 lock_again: 2929 if (wbc->sync_mode != WB_SYNC_NONE) { 2930 ret = folio_lock_killable(folio); 2931 if (ret < 0) 2932 return ret; 2933 } else { 2934 if (!folio_trylock(folio)) 2935 goto search_again; 2936 } 2937 2938 if (folio->mapping != mapping || 2939 !folio_test_dirty(folio)) { 2940 start += folio_size(folio); 2941 folio_unlock(folio); 2942 goto search_again; 2943 } 2944 2945 if (folio_test_writeback(folio) || 2946 folio_test_fscache(folio)) { 2947 folio_unlock(folio); 2948 if (wbc->sync_mode != WB_SYNC_NONE) { 2949 folio_wait_writeback(folio); 2950 #ifdef CONFIG_CIFS_FSCACHE 2951 folio_wait_fscache(folio); 2952 #endif 2953 goto lock_again; 2954 } 2955 2956 start += folio_size(folio); 2957 if (wbc->sync_mode == WB_SYNC_NONE) { 2958 if (skips >= 5 || need_resched()) { 2959 ret = 0; 2960 goto out; 2961 } 2962 skips++; 2963 } 2964 goto search_again; 2965 } 2966 2967 ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end); 2968 out: 2969 if (ret > 0) 2970 *_start = start + ret; 2971 return ret; 2972 } 2973 2974 /* 2975 * Write a region of pages back to the server 2976 */ 2977 static int cifs_writepages_region(struct address_space *mapping, 2978 struct writeback_control *wbc, 2979 unsigned long long *_start, 2980 unsigned long long end) 2981 { 2982 ssize_t ret; 2983 2984 XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 2985 2986 do { 2987 ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end); 2988 if (ret > 0 && wbc->nr_to_write > 0) 2989 cond_resched(); 2990 } while (ret > 0 && wbc->nr_to_write > 0); 2991 2992 return ret > 0 ? 0 : ret; 2993 } 2994 2995 /* 2996 * Write some of the pending data back to the server 2997 */ 2998 static int cifs_writepages(struct address_space *mapping, 2999 struct writeback_control *wbc) 3000 { 3001 loff_t start, end; 3002 int ret; 3003 3004 /* We have to be careful as we can end up racing with setattr() 3005 * truncating the pagecache since the caller doesn't take a lock here 3006 * to prevent it. 3007 */ 3008 3009 if (wbc->range_cyclic && mapping->writeback_index) { 3010 start = mapping->writeback_index * PAGE_SIZE; 3011 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3012 if (ret < 0) 3013 goto out; 3014 3015 if (wbc->nr_to_write <= 0) { 3016 mapping->writeback_index = start / PAGE_SIZE; 3017 goto out; 3018 } 3019 3020 start = 0; 3021 end = mapping->writeback_index * PAGE_SIZE; 3022 mapping->writeback_index = 0; 3023 ret = cifs_writepages_region(mapping, wbc, &start, end); 3024 if (ret == 0) 3025 mapping->writeback_index = start / PAGE_SIZE; 3026 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 3027 start = 0; 3028 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3029 if (wbc->nr_to_write > 0 && ret == 0) 3030 mapping->writeback_index = start / PAGE_SIZE; 3031 } else { 3032 start = wbc->range_start; 3033 ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end); 3034 } 3035 3036 out: 3037 return ret; 3038 } 3039 3040 static int 3041 cifs_writepage_locked(struct page *page, struct writeback_control *wbc) 3042 { 3043 int rc; 3044 unsigned int xid; 3045 3046 xid = get_xid(); 3047 /* BB add check for wbc flags */ 3048 get_page(page); 3049 if (!PageUptodate(page)) 3050 cifs_dbg(FYI, "ppw - page not up to date\n"); 3051 3052 /* 3053 * Set the "writeback" flag, and clear "dirty" in the radix tree. 3054 * 3055 * A writepage() implementation always needs to do either this, 3056 * or re-dirty the page with "redirty_page_for_writepage()" in 3057 * the case of a failure. 3058 * 3059 * Just unlocking the page will cause the radix tree tag-bits 3060 * to fail to update with the state of the page correctly. 3061 */ 3062 set_page_writeback(page); 3063 retry_write: 3064 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE); 3065 if (is_retryable_error(rc)) { 3066 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) 3067 goto retry_write; 3068 redirty_page_for_writepage(wbc, page); 3069 } else if (rc != 0) { 3070 SetPageError(page); 3071 mapping_set_error(page->mapping, rc); 3072 } else { 3073 SetPageUptodate(page); 3074 } 3075 end_page_writeback(page); 3076 put_page(page); 3077 free_xid(xid); 3078 return rc; 3079 } 3080 3081 static int cifs_write_end(struct file *file, struct address_space *mapping, 3082 loff_t pos, unsigned len, unsigned copied, 3083 struct page *page, void *fsdata) 3084 { 3085 int rc; 3086 struct inode *inode = mapping->host; 3087 struct cifsFileInfo *cfile = file->private_data; 3088 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 3089 struct folio *folio = page_folio(page); 3090 __u32 pid; 3091 3092 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3093 pid = cfile->pid; 3094 else 3095 pid = current->tgid; 3096 3097 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", 3098 page, pos, copied); 3099 3100 if (folio_test_checked(folio)) { 3101 if (copied == len) 3102 folio_mark_uptodate(folio); 3103 folio_clear_checked(folio); 3104 } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE) 3105 folio_mark_uptodate(folio); 3106 3107 if (!folio_test_uptodate(folio)) { 3108 char *page_data; 3109 unsigned offset = pos & (PAGE_SIZE - 1); 3110 unsigned int xid; 3111 3112 xid = get_xid(); 3113 /* this is probably better than directly calling 3114 partialpage_write since in this function the file handle is 3115 known which we might as well leverage */ 3116 /* BB check if anything else missing out of ppw 3117 such as updating last write time */ 3118 page_data = kmap(page); 3119 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos); 3120 /* if (rc < 0) should we set writebehind rc? */ 3121 kunmap(page); 3122 3123 free_xid(xid); 3124 } else { 3125 rc = copied; 3126 pos += copied; 3127 set_page_dirty(page); 3128 } 3129 3130 if (rc > 0) { 3131 spin_lock(&inode->i_lock); 3132 if (pos > inode->i_size) { 3133 loff_t additional_blocks = (512 - 1 + copied) >> 9; 3134 3135 i_size_write(inode, pos); 3136 /* 3137 * Estimate new allocation size based on the amount written. 3138 * This will be updated from server on close (and on queryinfo) 3139 */ 3140 inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9, 3141 inode->i_blocks + additional_blocks); 3142 } 3143 spin_unlock(&inode->i_lock); 3144 } 3145 3146 unlock_page(page); 3147 put_page(page); 3148 /* Indication to update ctime and mtime as close is deferred */ 3149 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 3150 3151 return rc; 3152 } 3153 3154 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, 3155 int datasync) 3156 { 3157 unsigned int xid; 3158 int rc = 0; 3159 struct cifs_tcon *tcon; 3160 struct TCP_Server_Info *server; 3161 struct cifsFileInfo *smbfile = file->private_data; 3162 struct inode *inode = file_inode(file); 3163 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3164 3165 rc = file_write_and_wait_range(file, start, end); 3166 if (rc) { 3167 trace_cifs_fsync_err(inode->i_ino, rc); 3168 return rc; 3169 } 3170 3171 xid = get_xid(); 3172 3173 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3174 file, datasync); 3175 3176 if (!CIFS_CACHE_READ(CIFS_I(inode))) { 3177 rc = cifs_zap_mapping(inode); 3178 if (rc) { 3179 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); 3180 rc = 0; /* don't care about it in fsync */ 3181 } 3182 } 3183 3184 tcon = tlink_tcon(smbfile->tlink); 3185 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3186 server = tcon->ses->server; 3187 if (server->ops->flush == NULL) { 3188 rc = -ENOSYS; 3189 goto strict_fsync_exit; 3190 } 3191 3192 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3193 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3194 if (smbfile) { 3195 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3196 cifsFileInfo_put(smbfile); 3197 } else 3198 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3199 } else 3200 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3201 } 3202 3203 strict_fsync_exit: 3204 free_xid(xid); 3205 return rc; 3206 } 3207 3208 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 3209 { 3210 unsigned int xid; 3211 int rc = 0; 3212 struct cifs_tcon *tcon; 3213 struct TCP_Server_Info *server; 3214 struct cifsFileInfo *smbfile = file->private_data; 3215 struct inode *inode = file_inode(file); 3216 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); 3217 3218 rc = file_write_and_wait_range(file, start, end); 3219 if (rc) { 3220 trace_cifs_fsync_err(file_inode(file)->i_ino, rc); 3221 return rc; 3222 } 3223 3224 xid = get_xid(); 3225 3226 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3227 file, datasync); 3228 3229 tcon = tlink_tcon(smbfile->tlink); 3230 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3231 server = tcon->ses->server; 3232 if (server->ops->flush == NULL) { 3233 rc = -ENOSYS; 3234 goto fsync_exit; 3235 } 3236 3237 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3238 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3239 if (smbfile) { 3240 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3241 cifsFileInfo_put(smbfile); 3242 } else 3243 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3244 } else 3245 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3246 } 3247 3248 fsync_exit: 3249 free_xid(xid); 3250 return rc; 3251 } 3252 3253 /* 3254 * As file closes, flush all cached write data for this inode checking 3255 * for write behind errors. 3256 */ 3257 int cifs_flush(struct file *file, fl_owner_t id) 3258 { 3259 struct inode *inode = file_inode(file); 3260 int rc = 0; 3261 3262 if (file->f_mode & FMODE_WRITE) 3263 rc = filemap_write_and_wait(inode->i_mapping); 3264 3265 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); 3266 if (rc) { 3267 /* get more nuanced writeback errors */ 3268 rc = filemap_check_wb_err(file->f_mapping, 0); 3269 trace_cifs_flush_err(inode->i_ino, rc); 3270 } 3271 return rc; 3272 } 3273 3274 static void 3275 cifs_uncached_writedata_release(struct kref *refcount) 3276 { 3277 struct cifs_writedata *wdata = container_of(refcount, 3278 struct cifs_writedata, refcount); 3279 3280 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); 3281 cifs_writedata_release(refcount); 3282 } 3283 3284 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx); 3285 3286 static void 3287 cifs_uncached_writev_complete(struct work_struct *work) 3288 { 3289 struct cifs_writedata *wdata = container_of(work, 3290 struct cifs_writedata, work); 3291 struct inode *inode = d_inode(wdata->cfile->dentry); 3292 struct cifsInodeInfo *cifsi = CIFS_I(inode); 3293 3294 spin_lock(&inode->i_lock); 3295 cifs_update_eof(cifsi, wdata->offset, wdata->bytes); 3296 if (cifsi->netfs.remote_i_size > inode->i_size) 3297 i_size_write(inode, cifsi->netfs.remote_i_size); 3298 spin_unlock(&inode->i_lock); 3299 3300 complete(&wdata->done); 3301 collect_uncached_write_data(wdata->ctx); 3302 /* the below call can possibly free the last ref to aio ctx */ 3303 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3304 } 3305 3306 static int 3307 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, 3308 struct cifs_aio_ctx *ctx) 3309 { 3310 unsigned int wsize; 3311 struct cifs_credits credits; 3312 int rc; 3313 struct TCP_Server_Info *server = wdata->server; 3314 3315 do { 3316 if (wdata->cfile->invalidHandle) { 3317 rc = cifs_reopen_file(wdata->cfile, false); 3318 if (rc == -EAGAIN) 3319 continue; 3320 else if (rc) 3321 break; 3322 } 3323 3324 3325 /* 3326 * Wait for credits to resend this wdata. 3327 * Note: we are attempting to resend the whole wdata not in 3328 * segments 3329 */ 3330 do { 3331 rc = server->ops->wait_mtu_credits(server, wdata->bytes, 3332 &wsize, &credits); 3333 if (rc) 3334 goto fail; 3335 3336 if (wsize < wdata->bytes) { 3337 add_credits_and_wake_if(server, &credits, 0); 3338 msleep(1000); 3339 } 3340 } while (wsize < wdata->bytes); 3341 wdata->credits = credits; 3342 3343 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3344 3345 if (!rc) { 3346 if (wdata->cfile->invalidHandle) 3347 rc = -EAGAIN; 3348 else { 3349 wdata->replay = true; 3350 #ifdef CONFIG_CIFS_SMB_DIRECT 3351 if (wdata->mr) { 3352 wdata->mr->need_invalidate = true; 3353 smbd_deregister_mr(wdata->mr); 3354 wdata->mr = NULL; 3355 } 3356 #endif 3357 rc = server->ops->async_writev(wdata, 3358 cifs_uncached_writedata_release); 3359 } 3360 } 3361 3362 /* If the write was successfully sent, we are done */ 3363 if (!rc) { 3364 list_add_tail(&wdata->list, wdata_list); 3365 return 0; 3366 } 3367 3368 /* Roll back credits and retry if needed */ 3369 add_credits_and_wake_if(server, &wdata->credits, 0); 3370 } while (rc == -EAGAIN); 3371 3372 fail: 3373 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3374 return rc; 3375 } 3376 3377 /* 3378 * Select span of a bvec iterator we're going to use. Limit it by both maximum 3379 * size and maximum number of segments. 3380 */ 3381 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size, 3382 size_t max_segs, unsigned int *_nsegs) 3383 { 3384 const struct bio_vec *bvecs = iter->bvec; 3385 unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; 3386 size_t len, span = 0, n = iter->count; 3387 size_t skip = iter->iov_offset; 3388 3389 if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0) 3390 return 0; 3391 3392 while (n && ix < nbv && skip) { 3393 len = bvecs[ix].bv_len; 3394 if (skip < len) 3395 break; 3396 skip -= len; 3397 n -= len; 3398 ix++; 3399 } 3400 3401 while (n && ix < nbv) { 3402 len = min3(n, bvecs[ix].bv_len - skip, max_size); 3403 span += len; 3404 max_size -= len; 3405 nsegs++; 3406 ix++; 3407 if (max_size == 0 || nsegs >= max_segs) 3408 break; 3409 skip = 0; 3410 n -= len; 3411 } 3412 3413 *_nsegs = nsegs; 3414 return span; 3415 } 3416 3417 static int 3418 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from, 3419 struct cifsFileInfo *open_file, 3420 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, 3421 struct cifs_aio_ctx *ctx) 3422 { 3423 int rc = 0; 3424 size_t cur_len, max_len; 3425 struct cifs_writedata *wdata; 3426 pid_t pid; 3427 struct TCP_Server_Info *server; 3428 unsigned int xid, max_segs = INT_MAX; 3429 3430 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3431 pid = open_file->pid; 3432 else 3433 pid = current->tgid; 3434 3435 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 3436 xid = get_xid(); 3437 3438 #ifdef CONFIG_CIFS_SMB_DIRECT 3439 if (server->smbd_conn) 3440 max_segs = server->smbd_conn->max_frmr_depth; 3441 #endif 3442 3443 do { 3444 struct cifs_credits credits_on_stack; 3445 struct cifs_credits *credits = &credits_on_stack; 3446 unsigned int wsize, nsegs = 0; 3447 3448 if (signal_pending(current)) { 3449 rc = -EINTR; 3450 break; 3451 } 3452 3453 if (open_file->invalidHandle) { 3454 rc = cifs_reopen_file(open_file, false); 3455 if (rc == -EAGAIN) 3456 continue; 3457 else if (rc) 3458 break; 3459 } 3460 3461 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 3462 &wsize, credits); 3463 if (rc) 3464 break; 3465 3466 max_len = min_t(const size_t, len, wsize); 3467 if (!max_len) { 3468 rc = -EAGAIN; 3469 add_credits_and_wake_if(server, credits, 0); 3470 break; 3471 } 3472 3473 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs); 3474 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n", 3475 cur_len, max_len, nsegs, from->nr_segs, max_segs); 3476 if (cur_len == 0) { 3477 rc = -EIO; 3478 add_credits_and_wake_if(server, credits, 0); 3479 break; 3480 } 3481 3482 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete); 3483 if (!wdata) { 3484 rc = -ENOMEM; 3485 add_credits_and_wake_if(server, credits, 0); 3486 break; 3487 } 3488 3489 wdata->sync_mode = WB_SYNC_ALL; 3490 wdata->offset = (__u64)fpos; 3491 wdata->cfile = cifsFileInfo_get(open_file); 3492 wdata->server = server; 3493 wdata->pid = pid; 3494 wdata->bytes = cur_len; 3495 wdata->credits = credits_on_stack; 3496 wdata->iter = *from; 3497 wdata->ctx = ctx; 3498 kref_get(&ctx->refcount); 3499 3500 iov_iter_truncate(&wdata->iter, cur_len); 3501 3502 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3503 3504 if (!rc) { 3505 if (wdata->cfile->invalidHandle) 3506 rc = -EAGAIN; 3507 else 3508 rc = server->ops->async_writev(wdata, 3509 cifs_uncached_writedata_release); 3510 } 3511 3512 if (rc) { 3513 add_credits_and_wake_if(server, &wdata->credits, 0); 3514 kref_put(&wdata->refcount, 3515 cifs_uncached_writedata_release); 3516 if (rc == -EAGAIN) 3517 continue; 3518 break; 3519 } 3520 3521 list_add_tail(&wdata->list, wdata_list); 3522 iov_iter_advance(from, cur_len); 3523 fpos += cur_len; 3524 len -= cur_len; 3525 } while (len > 0); 3526 3527 free_xid(xid); 3528 return rc; 3529 } 3530 3531 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) 3532 { 3533 struct cifs_writedata *wdata, *tmp; 3534 struct cifs_tcon *tcon; 3535 struct cifs_sb_info *cifs_sb; 3536 struct dentry *dentry = ctx->cfile->dentry; 3537 ssize_t rc; 3538 3539 tcon = tlink_tcon(ctx->cfile->tlink); 3540 cifs_sb = CIFS_SB(dentry->d_sb); 3541 3542 mutex_lock(&ctx->aio_mutex); 3543 3544 if (list_empty(&ctx->list)) { 3545 mutex_unlock(&ctx->aio_mutex); 3546 return; 3547 } 3548 3549 rc = ctx->rc; 3550 /* 3551 * Wait for and collect replies for any successful sends in order of 3552 * increasing offset. Once an error is hit, then return without waiting 3553 * for any more replies. 3554 */ 3555 restart_loop: 3556 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) { 3557 if (!rc) { 3558 if (!try_wait_for_completion(&wdata->done)) { 3559 mutex_unlock(&ctx->aio_mutex); 3560 return; 3561 } 3562 3563 if (wdata->result) 3564 rc = wdata->result; 3565 else 3566 ctx->total_len += wdata->bytes; 3567 3568 /* resend call if it's a retryable error */ 3569 if (rc == -EAGAIN) { 3570 struct list_head tmp_list; 3571 struct iov_iter tmp_from = ctx->iter; 3572 3573 INIT_LIST_HEAD(&tmp_list); 3574 list_del_init(&wdata->list); 3575 3576 if (ctx->direct_io) 3577 rc = cifs_resend_wdata( 3578 wdata, &tmp_list, ctx); 3579 else { 3580 iov_iter_advance(&tmp_from, 3581 wdata->offset - ctx->pos); 3582 3583 rc = cifs_write_from_iter(wdata->offset, 3584 wdata->bytes, &tmp_from, 3585 ctx->cfile, cifs_sb, &tmp_list, 3586 ctx); 3587 3588 kref_put(&wdata->refcount, 3589 cifs_uncached_writedata_release); 3590 } 3591 3592 list_splice(&tmp_list, &ctx->list); 3593 goto restart_loop; 3594 } 3595 } 3596 list_del_init(&wdata->list); 3597 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3598 } 3599 3600 cifs_stats_bytes_written(tcon, ctx->total_len); 3601 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); 3602 3603 ctx->rc = (rc == 0) ? ctx->total_len : rc; 3604 3605 mutex_unlock(&ctx->aio_mutex); 3606 3607 if (ctx->iocb && ctx->iocb->ki_complete) 3608 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 3609 else 3610 complete(&ctx->done); 3611 } 3612 3613 static ssize_t __cifs_writev( 3614 struct kiocb *iocb, struct iov_iter *from, bool direct) 3615 { 3616 struct file *file = iocb->ki_filp; 3617 ssize_t total_written = 0; 3618 struct cifsFileInfo *cfile; 3619 struct cifs_tcon *tcon; 3620 struct cifs_sb_info *cifs_sb; 3621 struct cifs_aio_ctx *ctx; 3622 int rc; 3623 3624 rc = generic_write_checks(iocb, from); 3625 if (rc <= 0) 3626 return rc; 3627 3628 cifs_sb = CIFS_FILE_SB(file); 3629 cfile = file->private_data; 3630 tcon = tlink_tcon(cfile->tlink); 3631 3632 if (!tcon->ses->server->ops->async_writev) 3633 return -ENOSYS; 3634 3635 ctx = cifs_aio_ctx_alloc(); 3636 if (!ctx) 3637 return -ENOMEM; 3638 3639 ctx->cfile = cifsFileInfo_get(cfile); 3640 3641 if (!is_sync_kiocb(iocb)) 3642 ctx->iocb = iocb; 3643 3644 ctx->pos = iocb->ki_pos; 3645 ctx->direct_io = direct; 3646 ctx->nr_pinned_pages = 0; 3647 3648 if (user_backed_iter(from)) { 3649 /* 3650 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 3651 * they contain references to the calling process's virtual 3652 * memory layout which won't be available in an async worker 3653 * thread. This also takes a pin on every folio involved. 3654 */ 3655 rc = netfs_extract_user_iter(from, iov_iter_count(from), 3656 &ctx->iter, 0); 3657 if (rc < 0) { 3658 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3659 return rc; 3660 } 3661 3662 ctx->nr_pinned_pages = rc; 3663 ctx->bv = (void *)ctx->iter.bvec; 3664 ctx->bv_need_unpin = iov_iter_extract_will_pin(from); 3665 } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) && 3666 !is_sync_kiocb(iocb)) { 3667 /* 3668 * If the op is asynchronous, we need to copy the list attached 3669 * to a BVEC/KVEC-type iterator, but we assume that the storage 3670 * will be pinned by the caller; in any case, we may or may not 3671 * be able to pin the pages, so we don't try. 3672 */ 3673 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL); 3674 if (!ctx->bv) { 3675 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3676 return -ENOMEM; 3677 } 3678 } else { 3679 /* 3680 * Otherwise, we just pass the iterator down as-is and rely on 3681 * the caller to make sure the pages referred to by the 3682 * iterator don't evaporate. 3683 */ 3684 ctx->iter = *from; 3685 } 3686 3687 ctx->len = iov_iter_count(&ctx->iter); 3688 3689 /* grab a lock here due to read response handlers can access ctx */ 3690 mutex_lock(&ctx->aio_mutex); 3691 3692 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter, 3693 cfile, cifs_sb, &ctx->list, ctx); 3694 3695 /* 3696 * If at least one write was successfully sent, then discard any rc 3697 * value from the later writes. If the other write succeeds, then 3698 * we'll end up returning whatever was written. If it fails, then 3699 * we'll get a new rc value from that. 3700 */ 3701 if (!list_empty(&ctx->list)) 3702 rc = 0; 3703 3704 mutex_unlock(&ctx->aio_mutex); 3705 3706 if (rc) { 3707 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3708 return rc; 3709 } 3710 3711 if (!is_sync_kiocb(iocb)) { 3712 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3713 return -EIOCBQUEUED; 3714 } 3715 3716 rc = wait_for_completion_killable(&ctx->done); 3717 if (rc) { 3718 mutex_lock(&ctx->aio_mutex); 3719 ctx->rc = rc = -EINTR; 3720 total_written = ctx->total_len; 3721 mutex_unlock(&ctx->aio_mutex); 3722 } else { 3723 rc = ctx->rc; 3724 total_written = ctx->total_len; 3725 } 3726 3727 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3728 3729 if (unlikely(!total_written)) 3730 return rc; 3731 3732 iocb->ki_pos += total_written; 3733 return total_written; 3734 } 3735 3736 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) 3737 { 3738 struct file *file = iocb->ki_filp; 3739 3740 cifs_revalidate_mapping(file->f_inode); 3741 return __cifs_writev(iocb, from, true); 3742 } 3743 3744 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) 3745 { 3746 return __cifs_writev(iocb, from, false); 3747 } 3748 3749 static ssize_t 3750 cifs_writev(struct kiocb *iocb, struct iov_iter *from) 3751 { 3752 struct file *file = iocb->ki_filp; 3753 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 3754 struct inode *inode = file->f_mapping->host; 3755 struct cifsInodeInfo *cinode = CIFS_I(inode); 3756 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 3757 ssize_t rc; 3758 3759 inode_lock(inode); 3760 /* 3761 * We need to hold the sem to be sure nobody modifies lock list 3762 * with a brlock that prevents writing. 3763 */ 3764 down_read(&cinode->lock_sem); 3765 3766 rc = generic_write_checks(iocb, from); 3767 if (rc <= 0) 3768 goto out; 3769 3770 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), 3771 server->vals->exclusive_lock_type, 0, 3772 NULL, CIFS_WRITE_OP)) 3773 rc = __generic_file_write_iter(iocb, from); 3774 else 3775 rc = -EACCES; 3776 out: 3777 up_read(&cinode->lock_sem); 3778 inode_unlock(inode); 3779 3780 if (rc > 0) 3781 rc = generic_write_sync(iocb, rc); 3782 return rc; 3783 } 3784 3785 ssize_t 3786 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) 3787 { 3788 struct inode *inode = file_inode(iocb->ki_filp); 3789 struct cifsInodeInfo *cinode = CIFS_I(inode); 3790 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3791 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 3792 iocb->ki_filp->private_data; 3793 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 3794 ssize_t written; 3795 3796 written = cifs_get_writer(cinode); 3797 if (written) 3798 return written; 3799 3800 if (CIFS_CACHE_WRITE(cinode)) { 3801 if (cap_unix(tcon->ses) && 3802 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 3803 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 3804 written = generic_file_write_iter(iocb, from); 3805 goto out; 3806 } 3807 written = cifs_writev(iocb, from); 3808 goto out; 3809 } 3810 /* 3811 * For non-oplocked files in strict cache mode we need to write the data 3812 * to the server exactly from the pos to pos+len-1 rather than flush all 3813 * affected pages because it may cause a error with mandatory locks on 3814 * these pages but not on the region from pos to ppos+len-1. 3815 */ 3816 written = cifs_user_writev(iocb, from); 3817 if (CIFS_CACHE_READ(cinode)) { 3818 /* 3819 * We have read level caching and we have just sent a write 3820 * request to the server thus making data in the cache stale. 3821 * Zap the cache and set oplock/lease level to NONE to avoid 3822 * reading stale data from the cache. All subsequent read 3823 * operations will read new data from the server. 3824 */ 3825 cifs_zap_mapping(inode); 3826 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n", 3827 inode); 3828 cinode->oplock = 0; 3829 } 3830 out: 3831 cifs_put_writer(cinode); 3832 return written; 3833 } 3834 3835 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete) 3836 { 3837 struct cifs_readdata *rdata; 3838 3839 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); 3840 if (rdata) { 3841 kref_init(&rdata->refcount); 3842 INIT_LIST_HEAD(&rdata->list); 3843 init_completion(&rdata->done); 3844 INIT_WORK(&rdata->work, complete); 3845 } 3846 3847 return rdata; 3848 } 3849 3850 void 3851 cifs_readdata_release(struct kref *refcount) 3852 { 3853 struct cifs_readdata *rdata = container_of(refcount, 3854 struct cifs_readdata, refcount); 3855 3856 if (rdata->ctx) 3857 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); 3858 #ifdef CONFIG_CIFS_SMB_DIRECT 3859 if (rdata->mr) { 3860 smbd_deregister_mr(rdata->mr); 3861 rdata->mr = NULL; 3862 } 3863 #endif 3864 if (rdata->cfile) 3865 cifsFileInfo_put(rdata->cfile); 3866 3867 kfree(rdata); 3868 } 3869 3870 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx); 3871 3872 static void 3873 cifs_uncached_readv_complete(struct work_struct *work) 3874 { 3875 struct cifs_readdata *rdata = container_of(work, 3876 struct cifs_readdata, work); 3877 3878 complete(&rdata->done); 3879 collect_uncached_read_data(rdata->ctx); 3880 /* the below call can possibly free the last ref to aio ctx */ 3881 kref_put(&rdata->refcount, cifs_readdata_release); 3882 } 3883 3884 static int cifs_resend_rdata(struct cifs_readdata *rdata, 3885 struct list_head *rdata_list, 3886 struct cifs_aio_ctx *ctx) 3887 { 3888 unsigned int rsize; 3889 struct cifs_credits credits; 3890 int rc; 3891 struct TCP_Server_Info *server; 3892 3893 /* XXX: should we pick a new channel here? */ 3894 server = rdata->server; 3895 3896 do { 3897 if (rdata->cfile->invalidHandle) { 3898 rc = cifs_reopen_file(rdata->cfile, true); 3899 if (rc == -EAGAIN) 3900 continue; 3901 else if (rc) 3902 break; 3903 } 3904 3905 /* 3906 * Wait for credits to resend this rdata. 3907 * Note: we are attempting to resend the whole rdata not in 3908 * segments 3909 */ 3910 do { 3911 rc = server->ops->wait_mtu_credits(server, rdata->bytes, 3912 &rsize, &credits); 3913 3914 if (rc) 3915 goto fail; 3916 3917 if (rsize < rdata->bytes) { 3918 add_credits_and_wake_if(server, &credits, 0); 3919 msleep(1000); 3920 } 3921 } while (rsize < rdata->bytes); 3922 rdata->credits = credits; 3923 3924 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 3925 if (!rc) { 3926 if (rdata->cfile->invalidHandle) 3927 rc = -EAGAIN; 3928 else { 3929 #ifdef CONFIG_CIFS_SMB_DIRECT 3930 if (rdata->mr) { 3931 rdata->mr->need_invalidate = true; 3932 smbd_deregister_mr(rdata->mr); 3933 rdata->mr = NULL; 3934 } 3935 #endif 3936 rc = server->ops->async_readv(rdata); 3937 } 3938 } 3939 3940 /* If the read was successfully sent, we are done */ 3941 if (!rc) { 3942 /* Add to aio pending list */ 3943 list_add_tail(&rdata->list, rdata_list); 3944 return 0; 3945 } 3946 3947 /* Roll back credits and retry if needed */ 3948 add_credits_and_wake_if(server, &rdata->credits, 0); 3949 } while (rc == -EAGAIN); 3950 3951 fail: 3952 kref_put(&rdata->refcount, cifs_readdata_release); 3953 return rc; 3954 } 3955 3956 static int 3957 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file, 3958 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, 3959 struct cifs_aio_ctx *ctx) 3960 { 3961 struct cifs_readdata *rdata; 3962 unsigned int rsize, nsegs, max_segs = INT_MAX; 3963 struct cifs_credits credits_on_stack; 3964 struct cifs_credits *credits = &credits_on_stack; 3965 size_t cur_len, max_len; 3966 int rc; 3967 pid_t pid; 3968 struct TCP_Server_Info *server; 3969 3970 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 3971 3972 #ifdef CONFIG_CIFS_SMB_DIRECT 3973 if (server->smbd_conn) 3974 max_segs = server->smbd_conn->max_frmr_depth; 3975 #endif 3976 3977 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3978 pid = open_file->pid; 3979 else 3980 pid = current->tgid; 3981 3982 do { 3983 if (open_file->invalidHandle) { 3984 rc = cifs_reopen_file(open_file, true); 3985 if (rc == -EAGAIN) 3986 continue; 3987 else if (rc) 3988 break; 3989 } 3990 3991 if (cifs_sb->ctx->rsize == 0) 3992 cifs_sb->ctx->rsize = 3993 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 3994 cifs_sb->ctx); 3995 3996 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 3997 &rsize, credits); 3998 if (rc) 3999 break; 4000 4001 max_len = min_t(size_t, len, rsize); 4002 4003 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len, 4004 max_segs, &nsegs); 4005 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n", 4006 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs); 4007 if (cur_len == 0) { 4008 rc = -EIO; 4009 add_credits_and_wake_if(server, credits, 0); 4010 break; 4011 } 4012 4013 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete); 4014 if (!rdata) { 4015 add_credits_and_wake_if(server, credits, 0); 4016 rc = -ENOMEM; 4017 break; 4018 } 4019 4020 rdata->server = server; 4021 rdata->cfile = cifsFileInfo_get(open_file); 4022 rdata->offset = fpos; 4023 rdata->bytes = cur_len; 4024 rdata->pid = pid; 4025 rdata->credits = credits_on_stack; 4026 rdata->ctx = ctx; 4027 kref_get(&ctx->refcount); 4028 4029 rdata->iter = ctx->iter; 4030 iov_iter_truncate(&rdata->iter, cur_len); 4031 4032 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4033 4034 if (!rc) { 4035 if (rdata->cfile->invalidHandle) 4036 rc = -EAGAIN; 4037 else 4038 rc = server->ops->async_readv(rdata); 4039 } 4040 4041 if (rc) { 4042 add_credits_and_wake_if(server, &rdata->credits, 0); 4043 kref_put(&rdata->refcount, cifs_readdata_release); 4044 if (rc == -EAGAIN) 4045 continue; 4046 break; 4047 } 4048 4049 list_add_tail(&rdata->list, rdata_list); 4050 iov_iter_advance(&ctx->iter, cur_len); 4051 fpos += cur_len; 4052 len -= cur_len; 4053 } while (len > 0); 4054 4055 return rc; 4056 } 4057 4058 static void 4059 collect_uncached_read_data(struct cifs_aio_ctx *ctx) 4060 { 4061 struct cifs_readdata *rdata, *tmp; 4062 struct cifs_sb_info *cifs_sb; 4063 int rc; 4064 4065 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb); 4066 4067 mutex_lock(&ctx->aio_mutex); 4068 4069 if (list_empty(&ctx->list)) { 4070 mutex_unlock(&ctx->aio_mutex); 4071 return; 4072 } 4073 4074 rc = ctx->rc; 4075 /* the loop below should proceed in the order of increasing offsets */ 4076 again: 4077 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) { 4078 if (!rc) { 4079 if (!try_wait_for_completion(&rdata->done)) { 4080 mutex_unlock(&ctx->aio_mutex); 4081 return; 4082 } 4083 4084 if (rdata->result == -EAGAIN) { 4085 /* resend call if it's a retryable error */ 4086 struct list_head tmp_list; 4087 unsigned int got_bytes = rdata->got_bytes; 4088 4089 list_del_init(&rdata->list); 4090 INIT_LIST_HEAD(&tmp_list); 4091 4092 if (ctx->direct_io) { 4093 /* 4094 * Re-use rdata as this is a 4095 * direct I/O 4096 */ 4097 rc = cifs_resend_rdata( 4098 rdata, 4099 &tmp_list, ctx); 4100 } else { 4101 rc = cifs_send_async_read( 4102 rdata->offset + got_bytes, 4103 rdata->bytes - got_bytes, 4104 rdata->cfile, cifs_sb, 4105 &tmp_list, ctx); 4106 4107 kref_put(&rdata->refcount, 4108 cifs_readdata_release); 4109 } 4110 4111 list_splice(&tmp_list, &ctx->list); 4112 4113 goto again; 4114 } else if (rdata->result) 4115 rc = rdata->result; 4116 4117 /* if there was a short read -- discard anything left */ 4118 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) 4119 rc = -ENODATA; 4120 4121 ctx->total_len += rdata->got_bytes; 4122 } 4123 list_del_init(&rdata->list); 4124 kref_put(&rdata->refcount, cifs_readdata_release); 4125 } 4126 4127 /* mask nodata case */ 4128 if (rc == -ENODATA) 4129 rc = 0; 4130 4131 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; 4132 4133 mutex_unlock(&ctx->aio_mutex); 4134 4135 if (ctx->iocb && ctx->iocb->ki_complete) 4136 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 4137 else 4138 complete(&ctx->done); 4139 } 4140 4141 static ssize_t __cifs_readv( 4142 struct kiocb *iocb, struct iov_iter *to, bool direct) 4143 { 4144 size_t len; 4145 struct file *file = iocb->ki_filp; 4146 struct cifs_sb_info *cifs_sb; 4147 struct cifsFileInfo *cfile; 4148 struct cifs_tcon *tcon; 4149 ssize_t rc, total_read = 0; 4150 loff_t offset = iocb->ki_pos; 4151 struct cifs_aio_ctx *ctx; 4152 4153 len = iov_iter_count(to); 4154 if (!len) 4155 return 0; 4156 4157 cifs_sb = CIFS_FILE_SB(file); 4158 cfile = file->private_data; 4159 tcon = tlink_tcon(cfile->tlink); 4160 4161 if (!tcon->ses->server->ops->async_readv) 4162 return -ENOSYS; 4163 4164 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4165 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4166 4167 ctx = cifs_aio_ctx_alloc(); 4168 if (!ctx) 4169 return -ENOMEM; 4170 4171 ctx->pos = offset; 4172 ctx->direct_io = direct; 4173 ctx->len = len; 4174 ctx->cfile = cifsFileInfo_get(cfile); 4175 ctx->nr_pinned_pages = 0; 4176 4177 if (!is_sync_kiocb(iocb)) 4178 ctx->iocb = iocb; 4179 4180 if (user_backed_iter(to)) { 4181 /* 4182 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 4183 * they contain references to the calling process's virtual 4184 * memory layout which won't be available in an async worker 4185 * thread. This also takes a pin on every folio involved. 4186 */ 4187 rc = netfs_extract_user_iter(to, iov_iter_count(to), 4188 &ctx->iter, 0); 4189 if (rc < 0) { 4190 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4191 return rc; 4192 } 4193 4194 ctx->nr_pinned_pages = rc; 4195 ctx->bv = (void *)ctx->iter.bvec; 4196 ctx->bv_need_unpin = iov_iter_extract_will_pin(to); 4197 ctx->should_dirty = true; 4198 } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) && 4199 !is_sync_kiocb(iocb)) { 4200 /* 4201 * If the op is asynchronous, we need to copy the list attached 4202 * to a BVEC/KVEC-type iterator, but we assume that the storage 4203 * will be retained by the caller; in any case, we may or may 4204 * not be able to pin the pages, so we don't try. 4205 */ 4206 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL); 4207 if (!ctx->bv) { 4208 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4209 return -ENOMEM; 4210 } 4211 } else { 4212 /* 4213 * Otherwise, we just pass the iterator down as-is and rely on 4214 * the caller to make sure the pages referred to by the 4215 * iterator don't evaporate. 4216 */ 4217 ctx->iter = *to; 4218 } 4219 4220 if (direct) { 4221 rc = filemap_write_and_wait_range(file->f_inode->i_mapping, 4222 offset, offset + len - 1); 4223 if (rc) { 4224 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4225 return -EAGAIN; 4226 } 4227 } 4228 4229 /* grab a lock here due to read response handlers can access ctx */ 4230 mutex_lock(&ctx->aio_mutex); 4231 4232 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx); 4233 4234 /* if at least one read request send succeeded, then reset rc */ 4235 if (!list_empty(&ctx->list)) 4236 rc = 0; 4237 4238 mutex_unlock(&ctx->aio_mutex); 4239 4240 if (rc) { 4241 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4242 return rc; 4243 } 4244 4245 if (!is_sync_kiocb(iocb)) { 4246 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4247 return -EIOCBQUEUED; 4248 } 4249 4250 rc = wait_for_completion_killable(&ctx->done); 4251 if (rc) { 4252 mutex_lock(&ctx->aio_mutex); 4253 ctx->rc = rc = -EINTR; 4254 total_read = ctx->total_len; 4255 mutex_unlock(&ctx->aio_mutex); 4256 } else { 4257 rc = ctx->rc; 4258 total_read = ctx->total_len; 4259 } 4260 4261 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4262 4263 if (total_read) { 4264 iocb->ki_pos += total_read; 4265 return total_read; 4266 } 4267 return rc; 4268 } 4269 4270 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) 4271 { 4272 return __cifs_readv(iocb, to, true); 4273 } 4274 4275 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) 4276 { 4277 return __cifs_readv(iocb, to, false); 4278 } 4279 4280 ssize_t 4281 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) 4282 { 4283 struct inode *inode = file_inode(iocb->ki_filp); 4284 struct cifsInodeInfo *cinode = CIFS_I(inode); 4285 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 4286 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 4287 iocb->ki_filp->private_data; 4288 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 4289 int rc = -EACCES; 4290 4291 /* 4292 * In strict cache mode we need to read from the server all the time 4293 * if we don't have level II oplock because the server can delay mtime 4294 * change - so we can't make a decision about inode invalidating. 4295 * And we can also fail with pagereading if there are mandatory locks 4296 * on pages affected by this read but not on the region from pos to 4297 * pos+len-1. 4298 */ 4299 if (!CIFS_CACHE_READ(cinode)) 4300 return cifs_user_readv(iocb, to); 4301 4302 if (cap_unix(tcon->ses) && 4303 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 4304 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 4305 return generic_file_read_iter(iocb, to); 4306 4307 /* 4308 * We need to hold the sem to be sure nobody modifies lock list 4309 * with a brlock that prevents reading. 4310 */ 4311 down_read(&cinode->lock_sem); 4312 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), 4313 tcon->ses->server->vals->shared_lock_type, 4314 0, NULL, CIFS_READ_OP)) 4315 rc = generic_file_read_iter(iocb, to); 4316 up_read(&cinode->lock_sem); 4317 return rc; 4318 } 4319 4320 static ssize_t 4321 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) 4322 { 4323 int rc = -EACCES; 4324 unsigned int bytes_read = 0; 4325 unsigned int total_read; 4326 unsigned int current_read_size; 4327 unsigned int rsize; 4328 struct cifs_sb_info *cifs_sb; 4329 struct cifs_tcon *tcon; 4330 struct TCP_Server_Info *server; 4331 unsigned int xid; 4332 char *cur_offset; 4333 struct cifsFileInfo *open_file; 4334 struct cifs_io_parms io_parms = {0}; 4335 int buf_type = CIFS_NO_BUFFER; 4336 __u32 pid; 4337 4338 xid = get_xid(); 4339 cifs_sb = CIFS_FILE_SB(file); 4340 4341 /* FIXME: set up handlers for larger reads and/or convert to async */ 4342 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize); 4343 4344 if (file->private_data == NULL) { 4345 rc = -EBADF; 4346 free_xid(xid); 4347 return rc; 4348 } 4349 open_file = file->private_data; 4350 tcon = tlink_tcon(open_file->tlink); 4351 server = cifs_pick_channel(tcon->ses); 4352 4353 if (!server->ops->sync_read) { 4354 free_xid(xid); 4355 return -ENOSYS; 4356 } 4357 4358 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4359 pid = open_file->pid; 4360 else 4361 pid = current->tgid; 4362 4363 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4364 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4365 4366 for (total_read = 0, cur_offset = read_data; read_size > total_read; 4367 total_read += bytes_read, cur_offset += bytes_read) { 4368 do { 4369 current_read_size = min_t(uint, read_size - total_read, 4370 rsize); 4371 /* 4372 * For windows me and 9x we do not want to request more 4373 * than it negotiated since it will refuse the read 4374 * then. 4375 */ 4376 if (!(tcon->ses->capabilities & 4377 tcon->ses->server->vals->cap_large_files)) { 4378 current_read_size = min_t(uint, 4379 current_read_size, CIFSMaxBufSize); 4380 } 4381 if (open_file->invalidHandle) { 4382 rc = cifs_reopen_file(open_file, true); 4383 if (rc != 0) 4384 break; 4385 } 4386 io_parms.pid = pid; 4387 io_parms.tcon = tcon; 4388 io_parms.offset = *offset; 4389 io_parms.length = current_read_size; 4390 io_parms.server = server; 4391 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, 4392 &bytes_read, &cur_offset, 4393 &buf_type); 4394 } while (rc == -EAGAIN); 4395 4396 if (rc || (bytes_read == 0)) { 4397 if (total_read) { 4398 break; 4399 } else { 4400 free_xid(xid); 4401 return rc; 4402 } 4403 } else { 4404 cifs_stats_bytes_read(tcon, total_read); 4405 *offset += bytes_read; 4406 } 4407 } 4408 free_xid(xid); 4409 return total_read; 4410 } 4411 4412 /* 4413 * If the page is mmap'ed into a process' page tables, then we need to make 4414 * sure that it doesn't change while being written back. 4415 */ 4416 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) 4417 { 4418 struct folio *folio = page_folio(vmf->page); 4419 4420 /* Wait for the folio to be written to the cache before we allow it to 4421 * be modified. We then assume the entire folio will need writing back. 4422 */ 4423 #ifdef CONFIG_CIFS_FSCACHE 4424 if (folio_test_fscache(folio) && 4425 folio_wait_fscache_killable(folio) < 0) 4426 return VM_FAULT_RETRY; 4427 #endif 4428 4429 folio_wait_writeback(folio); 4430 4431 if (folio_lock_killable(folio) < 0) 4432 return VM_FAULT_RETRY; 4433 return VM_FAULT_LOCKED; 4434 } 4435 4436 static const struct vm_operations_struct cifs_file_vm_ops = { 4437 .fault = filemap_fault, 4438 .map_pages = filemap_map_pages, 4439 .page_mkwrite = cifs_page_mkwrite, 4440 }; 4441 4442 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 4443 { 4444 int xid, rc = 0; 4445 struct inode *inode = file_inode(file); 4446 4447 xid = get_xid(); 4448 4449 if (!CIFS_CACHE_READ(CIFS_I(inode))) 4450 rc = cifs_zap_mapping(inode); 4451 if (!rc) 4452 rc = generic_file_mmap(file, vma); 4453 if (!rc) 4454 vma->vm_ops = &cifs_file_vm_ops; 4455 4456 free_xid(xid); 4457 return rc; 4458 } 4459 4460 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) 4461 { 4462 int rc, xid; 4463 4464 xid = get_xid(); 4465 4466 rc = cifs_revalidate_file(file); 4467 if (rc) 4468 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", 4469 rc); 4470 if (!rc) 4471 rc = generic_file_mmap(file, vma); 4472 if (!rc) 4473 vma->vm_ops = &cifs_file_vm_ops; 4474 4475 free_xid(xid); 4476 return rc; 4477 } 4478 4479 /* 4480 * Unlock a bunch of folios in the pagecache. 4481 */ 4482 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last) 4483 { 4484 struct folio *folio; 4485 XA_STATE(xas, &mapping->i_pages, first); 4486 4487 rcu_read_lock(); 4488 xas_for_each(&xas, folio, last) { 4489 folio_unlock(folio); 4490 } 4491 rcu_read_unlock(); 4492 } 4493 4494 static void cifs_readahead_complete(struct work_struct *work) 4495 { 4496 struct cifs_readdata *rdata = container_of(work, 4497 struct cifs_readdata, work); 4498 struct folio *folio; 4499 pgoff_t last; 4500 bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes); 4501 4502 XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE); 4503 4504 if (good) 4505 cifs_readahead_to_fscache(rdata->mapping->host, 4506 rdata->offset, rdata->bytes); 4507 4508 if (iov_iter_count(&rdata->iter) > 0) 4509 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter); 4510 4511 last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE; 4512 4513 rcu_read_lock(); 4514 xas_for_each(&xas, folio, last) { 4515 if (good) { 4516 flush_dcache_folio(folio); 4517 folio_mark_uptodate(folio); 4518 } 4519 folio_unlock(folio); 4520 } 4521 rcu_read_unlock(); 4522 4523 kref_put(&rdata->refcount, cifs_readdata_release); 4524 } 4525 4526 static void cifs_readahead(struct readahead_control *ractl) 4527 { 4528 struct cifsFileInfo *open_file = ractl->file->private_data; 4529 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file); 4530 struct TCP_Server_Info *server; 4531 unsigned int xid, nr_pages, cache_nr_pages = 0; 4532 unsigned int ra_pages; 4533 pgoff_t next_cached = ULONG_MAX, ra_index; 4534 bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) && 4535 cifs_inode_cookie(ractl->mapping->host)->cache_priv; 4536 bool check_cache = caching; 4537 pid_t pid; 4538 int rc = 0; 4539 4540 /* Note that readahead_count() lags behind our dequeuing of pages from 4541 * the ractl, wo we have to keep track for ourselves. 4542 */ 4543 ra_pages = readahead_count(ractl); 4544 ra_index = readahead_index(ractl); 4545 4546 xid = get_xid(); 4547 4548 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4549 pid = open_file->pid; 4550 else 4551 pid = current->tgid; 4552 4553 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4554 4555 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", 4556 __func__, ractl->file, ractl->mapping, ra_pages); 4557 4558 /* 4559 * Chop the readahead request up into rsize-sized read requests. 4560 */ 4561 while ((nr_pages = ra_pages)) { 4562 unsigned int i, rsize; 4563 struct cifs_readdata *rdata; 4564 struct cifs_credits credits_on_stack; 4565 struct cifs_credits *credits = &credits_on_stack; 4566 struct folio *folio; 4567 pgoff_t fsize; 4568 4569 /* 4570 * Find out if we have anything cached in the range of 4571 * interest, and if so, where the next chunk of cached data is. 4572 */ 4573 if (caching) { 4574 if (check_cache) { 4575 rc = cifs_fscache_query_occupancy( 4576 ractl->mapping->host, ra_index, nr_pages, 4577 &next_cached, &cache_nr_pages); 4578 if (rc < 0) 4579 caching = false; 4580 check_cache = false; 4581 } 4582 4583 if (ra_index == next_cached) { 4584 /* 4585 * TODO: Send a whole batch of pages to be read 4586 * by the cache. 4587 */ 4588 folio = readahead_folio(ractl); 4589 fsize = folio_nr_pages(folio); 4590 ra_pages -= fsize; 4591 ra_index += fsize; 4592 if (cifs_readpage_from_fscache(ractl->mapping->host, 4593 &folio->page) < 0) { 4594 /* 4595 * TODO: Deal with cache read failure 4596 * here, but for the moment, delegate 4597 * that to readpage. 4598 */ 4599 caching = false; 4600 } 4601 folio_unlock(folio); 4602 next_cached += fsize; 4603 cache_nr_pages -= fsize; 4604 if (cache_nr_pages == 0) 4605 check_cache = true; 4606 continue; 4607 } 4608 } 4609 4610 if (open_file->invalidHandle) { 4611 rc = cifs_reopen_file(open_file, true); 4612 if (rc) { 4613 if (rc == -EAGAIN) 4614 continue; 4615 break; 4616 } 4617 } 4618 4619 if (cifs_sb->ctx->rsize == 0) 4620 cifs_sb->ctx->rsize = 4621 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4622 cifs_sb->ctx); 4623 4624 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4625 &rsize, credits); 4626 if (rc) 4627 break; 4628 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages); 4629 if (next_cached != ULONG_MAX) 4630 nr_pages = min_t(size_t, nr_pages, next_cached - ra_index); 4631 4632 /* 4633 * Give up immediately if rsize is too small to read an entire 4634 * page. The VFS will fall back to readpage. We should never 4635 * reach this point however since we set ra_pages to 0 when the 4636 * rsize is smaller than a cache page. 4637 */ 4638 if (unlikely(!nr_pages)) { 4639 add_credits_and_wake_if(server, credits, 0); 4640 break; 4641 } 4642 4643 rdata = cifs_readdata_alloc(cifs_readahead_complete); 4644 if (!rdata) { 4645 /* best to give up if we're out of mem */ 4646 add_credits_and_wake_if(server, credits, 0); 4647 break; 4648 } 4649 4650 rdata->offset = ra_index * PAGE_SIZE; 4651 rdata->bytes = nr_pages * PAGE_SIZE; 4652 rdata->cfile = cifsFileInfo_get(open_file); 4653 rdata->server = server; 4654 rdata->mapping = ractl->mapping; 4655 rdata->pid = pid; 4656 rdata->credits = credits_on_stack; 4657 4658 for (i = 0; i < nr_pages; i++) { 4659 if (!readahead_folio(ractl)) 4660 WARN_ON(1); 4661 } 4662 ra_pages -= nr_pages; 4663 ra_index += nr_pages; 4664 4665 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages, 4666 rdata->offset, rdata->bytes); 4667 4668 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4669 if (!rc) { 4670 if (rdata->cfile->invalidHandle) 4671 rc = -EAGAIN; 4672 else 4673 rc = server->ops->async_readv(rdata); 4674 } 4675 4676 if (rc) { 4677 add_credits_and_wake_if(server, &rdata->credits, 0); 4678 cifs_unlock_folios(rdata->mapping, 4679 rdata->offset / PAGE_SIZE, 4680 (rdata->offset + rdata->bytes - 1) / PAGE_SIZE); 4681 /* Fallback to the readpage in error/reconnect cases */ 4682 kref_put(&rdata->refcount, cifs_readdata_release); 4683 break; 4684 } 4685 4686 kref_put(&rdata->refcount, cifs_readdata_release); 4687 } 4688 4689 free_xid(xid); 4690 } 4691 4692 /* 4693 * cifs_readpage_worker must be called with the page pinned 4694 */ 4695 static int cifs_readpage_worker(struct file *file, struct page *page, 4696 loff_t *poffset) 4697 { 4698 struct inode *inode = file_inode(file); 4699 struct timespec64 atime, mtime; 4700 char *read_data; 4701 int rc; 4702 4703 /* Is the page cached? */ 4704 rc = cifs_readpage_from_fscache(inode, page); 4705 if (rc == 0) 4706 goto read_complete; 4707 4708 read_data = kmap(page); 4709 /* for reads over a certain size could initiate async read ahead */ 4710 4711 rc = cifs_read(file, read_data, PAGE_SIZE, poffset); 4712 4713 if (rc < 0) 4714 goto io_error; 4715 else 4716 cifs_dbg(FYI, "Bytes read %d\n", rc); 4717 4718 /* we do not want atime to be less than mtime, it broke some apps */ 4719 atime = inode_set_atime_to_ts(inode, current_time(inode)); 4720 mtime = inode_get_mtime(inode); 4721 if (timespec64_compare(&atime, &mtime) < 0) 4722 inode_set_atime_to_ts(inode, inode_get_mtime(inode)); 4723 4724 if (PAGE_SIZE > rc) 4725 memset(read_data + rc, 0, PAGE_SIZE - rc); 4726 4727 flush_dcache_page(page); 4728 SetPageUptodate(page); 4729 rc = 0; 4730 4731 io_error: 4732 kunmap(page); 4733 4734 read_complete: 4735 unlock_page(page); 4736 return rc; 4737 } 4738 4739 static int cifs_read_folio(struct file *file, struct folio *folio) 4740 { 4741 struct page *page = &folio->page; 4742 loff_t offset = page_file_offset(page); 4743 int rc = -EACCES; 4744 unsigned int xid; 4745 4746 xid = get_xid(); 4747 4748 if (file->private_data == NULL) { 4749 rc = -EBADF; 4750 free_xid(xid); 4751 return rc; 4752 } 4753 4754 cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n", 4755 page, (int)offset, (int)offset); 4756 4757 rc = cifs_readpage_worker(file, page, &offset); 4758 4759 free_xid(xid); 4760 return rc; 4761 } 4762 4763 static int is_inode_writable(struct cifsInodeInfo *cifs_inode) 4764 { 4765 struct cifsFileInfo *open_file; 4766 4767 spin_lock(&cifs_inode->open_file_lock); 4768 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 4769 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 4770 spin_unlock(&cifs_inode->open_file_lock); 4771 return 1; 4772 } 4773 } 4774 spin_unlock(&cifs_inode->open_file_lock); 4775 return 0; 4776 } 4777 4778 /* We do not want to update the file size from server for inodes 4779 open for write - to avoid races with writepage extending 4780 the file - in the future we could consider allowing 4781 refreshing the inode only on increases in the file size 4782 but this is tricky to do without racing with writebehind 4783 page caching in the current Linux kernel design */ 4784 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file, 4785 bool from_readdir) 4786 { 4787 if (!cifsInode) 4788 return true; 4789 4790 if (is_inode_writable(cifsInode) || 4791 ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) { 4792 /* This inode is open for write at least once */ 4793 struct cifs_sb_info *cifs_sb; 4794 4795 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb); 4796 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 4797 /* since no page cache to corrupt on directio 4798 we can change size safely */ 4799 return true; 4800 } 4801 4802 if (i_size_read(&cifsInode->netfs.inode) < end_of_file) 4803 return true; 4804 4805 return false; 4806 } else 4807 return true; 4808 } 4809 4810 static int cifs_write_begin(struct file *file, struct address_space *mapping, 4811 loff_t pos, unsigned len, 4812 struct page **pagep, void **fsdata) 4813 { 4814 int oncethru = 0; 4815 pgoff_t index = pos >> PAGE_SHIFT; 4816 loff_t offset = pos & (PAGE_SIZE - 1); 4817 loff_t page_start = pos & PAGE_MASK; 4818 loff_t i_size; 4819 struct page *page; 4820 int rc = 0; 4821 4822 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); 4823 4824 start: 4825 page = grab_cache_page_write_begin(mapping, index); 4826 if (!page) { 4827 rc = -ENOMEM; 4828 goto out; 4829 } 4830 4831 if (PageUptodate(page)) 4832 goto out; 4833 4834 /* 4835 * If we write a full page it will be up to date, no need to read from 4836 * the server. If the write is short, we'll end up doing a sync write 4837 * instead. 4838 */ 4839 if (len == PAGE_SIZE) 4840 goto out; 4841 4842 /* 4843 * optimize away the read when we have an oplock, and we're not 4844 * expecting to use any of the data we'd be reading in. That 4845 * is, when the page lies beyond the EOF, or straddles the EOF 4846 * and the write will cover all of the existing data. 4847 */ 4848 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { 4849 i_size = i_size_read(mapping->host); 4850 if (page_start >= i_size || 4851 (offset == 0 && (pos + len) >= i_size)) { 4852 zero_user_segments(page, 0, offset, 4853 offset + len, 4854 PAGE_SIZE); 4855 /* 4856 * PageChecked means that the parts of the page 4857 * to which we're not writing are considered up 4858 * to date. Once the data is copied to the 4859 * page, it can be set uptodate. 4860 */ 4861 SetPageChecked(page); 4862 goto out; 4863 } 4864 } 4865 4866 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) { 4867 /* 4868 * might as well read a page, it is fast enough. If we get 4869 * an error, we don't need to return it. cifs_write_end will 4870 * do a sync write instead since PG_uptodate isn't set. 4871 */ 4872 cifs_readpage_worker(file, page, &page_start); 4873 put_page(page); 4874 oncethru = 1; 4875 goto start; 4876 } else { 4877 /* we could try using another file handle if there is one - 4878 but how would we lock it to prevent close of that handle 4879 racing with this read? In any case 4880 this will be written out by write_end so is fine */ 4881 } 4882 out: 4883 *pagep = page; 4884 return rc; 4885 } 4886 4887 static bool cifs_release_folio(struct folio *folio, gfp_t gfp) 4888 { 4889 if (folio_test_private(folio)) 4890 return 0; 4891 if (folio_test_fscache(folio)) { 4892 if (current_is_kswapd() || !(gfp & __GFP_FS)) 4893 return false; 4894 folio_wait_fscache(folio); 4895 } 4896 fscache_note_page_release(cifs_inode_cookie(folio->mapping->host)); 4897 return true; 4898 } 4899 4900 static void cifs_invalidate_folio(struct folio *folio, size_t offset, 4901 size_t length) 4902 { 4903 folio_wait_fscache(folio); 4904 } 4905 4906 static int cifs_launder_folio(struct folio *folio) 4907 { 4908 int rc = 0; 4909 loff_t range_start = folio_pos(folio); 4910 loff_t range_end = range_start + folio_size(folio); 4911 struct writeback_control wbc = { 4912 .sync_mode = WB_SYNC_ALL, 4913 .nr_to_write = 0, 4914 .range_start = range_start, 4915 .range_end = range_end, 4916 }; 4917 4918 cifs_dbg(FYI, "Launder page: %lu\n", folio->index); 4919 4920 if (folio_clear_dirty_for_io(folio)) 4921 rc = cifs_writepage_locked(&folio->page, &wbc); 4922 4923 folio_wait_fscache(folio); 4924 return rc; 4925 } 4926 4927 void cifs_oplock_break(struct work_struct *work) 4928 { 4929 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 4930 oplock_break); 4931 struct inode *inode = d_inode(cfile->dentry); 4932 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 4933 struct cifsInodeInfo *cinode = CIFS_I(inode); 4934 struct cifs_tcon *tcon; 4935 struct TCP_Server_Info *server; 4936 struct tcon_link *tlink; 4937 int rc = 0; 4938 bool purge_cache = false, oplock_break_cancelled; 4939 __u64 persistent_fid, volatile_fid; 4940 __u16 net_fid; 4941 4942 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, 4943 TASK_UNINTERRUPTIBLE); 4944 4945 tlink = cifs_sb_tlink(cifs_sb); 4946 if (IS_ERR(tlink)) 4947 goto out; 4948 tcon = tlink_tcon(tlink); 4949 server = tcon->ses->server; 4950 4951 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level, 4952 cfile->oplock_epoch, &purge_cache); 4953 4954 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && 4955 cifs_has_mand_locks(cinode)) { 4956 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", 4957 inode); 4958 cinode->oplock = 0; 4959 } 4960 4961 if (inode && S_ISREG(inode->i_mode)) { 4962 if (CIFS_CACHE_READ(cinode)) 4963 break_lease(inode, O_RDONLY); 4964 else 4965 break_lease(inode, O_WRONLY); 4966 rc = filemap_fdatawrite(inode->i_mapping); 4967 if (!CIFS_CACHE_READ(cinode) || purge_cache) { 4968 rc = filemap_fdatawait(inode->i_mapping); 4969 mapping_set_error(inode->i_mapping, rc); 4970 cifs_zap_mapping(inode); 4971 } 4972 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc); 4973 if (CIFS_CACHE_WRITE(cinode)) 4974 goto oplock_break_ack; 4975 } 4976 4977 rc = cifs_push_locks(cfile); 4978 if (rc) 4979 cifs_dbg(VFS, "Push locks rc = %d\n", rc); 4980 4981 oplock_break_ack: 4982 /* 4983 * When oplock break is received and there are no active 4984 * file handles but cached, then schedule deferred close immediately. 4985 * So, new open will not use cached handle. 4986 */ 4987 4988 if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes)) 4989 cifs_close_deferred_file(cinode); 4990 4991 persistent_fid = cfile->fid.persistent_fid; 4992 volatile_fid = cfile->fid.volatile_fid; 4993 net_fid = cfile->fid.netfid; 4994 oplock_break_cancelled = cfile->oplock_break_cancelled; 4995 4996 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); 4997 /* 4998 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require 4999 * an acknowledgment to be sent when the file has already been closed. 5000 */ 5001 spin_lock(&cinode->open_file_lock); 5002 /* check list empty since can race with kill_sb calling tree disconnect */ 5003 if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) { 5004 spin_unlock(&cinode->open_file_lock); 5005 rc = server->ops->oplock_response(tcon, persistent_fid, 5006 volatile_fid, net_fid, cinode); 5007 cifs_dbg(FYI, "Oplock release rc = %d\n", rc); 5008 } else 5009 spin_unlock(&cinode->open_file_lock); 5010 5011 cifs_put_tlink(tlink); 5012 out: 5013 cifs_done_oplock_break(cinode); 5014 } 5015 5016 /* 5017 * The presence of cifs_direct_io() in the address space ops vector 5018 * allowes open() O_DIRECT flags which would have failed otherwise. 5019 * 5020 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests 5021 * so this method should never be called. 5022 * 5023 * Direct IO is not yet supported in the cached mode. 5024 */ 5025 static ssize_t 5026 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) 5027 { 5028 /* 5029 * FIXME 5030 * Eventually need to support direct IO for non forcedirectio mounts 5031 */ 5032 return -EINVAL; 5033 } 5034 5035 static int cifs_swap_activate(struct swap_info_struct *sis, 5036 struct file *swap_file, sector_t *span) 5037 { 5038 struct cifsFileInfo *cfile = swap_file->private_data; 5039 struct inode *inode = swap_file->f_mapping->host; 5040 unsigned long blocks; 5041 long long isize; 5042 5043 cifs_dbg(FYI, "swap activate\n"); 5044 5045 if (!swap_file->f_mapping->a_ops->swap_rw) 5046 /* Cannot support swap */ 5047 return -EINVAL; 5048 5049 spin_lock(&inode->i_lock); 5050 blocks = inode->i_blocks; 5051 isize = inode->i_size; 5052 spin_unlock(&inode->i_lock); 5053 if (blocks*512 < isize) { 5054 pr_warn("swap activate: swapfile has holes\n"); 5055 return -EINVAL; 5056 } 5057 *span = sis->pages; 5058 5059 pr_warn_once("Swap support over SMB3 is experimental\n"); 5060 5061 /* 5062 * TODO: consider adding ACL (or documenting how) to prevent other 5063 * users (on this or other systems) from reading it 5064 */ 5065 5066 5067 /* TODO: add sk_set_memalloc(inet) or similar */ 5068 5069 if (cfile) 5070 cfile->swapfile = true; 5071 /* 5072 * TODO: Since file already open, we can't open with DENY_ALL here 5073 * but we could add call to grab a byte range lock to prevent others 5074 * from reading or writing the file 5075 */ 5076 5077 sis->flags |= SWP_FS_OPS; 5078 return add_swap_extent(sis, 0, sis->max, 0); 5079 } 5080 5081 static void cifs_swap_deactivate(struct file *file) 5082 { 5083 struct cifsFileInfo *cfile = file->private_data; 5084 5085 cifs_dbg(FYI, "swap deactivate\n"); 5086 5087 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */ 5088 5089 if (cfile) 5090 cfile->swapfile = false; 5091 5092 /* do we need to unpin (or unlock) the file */ 5093 } 5094 5095 const struct address_space_operations cifs_addr_ops = { 5096 .read_folio = cifs_read_folio, 5097 .readahead = cifs_readahead, 5098 .writepages = cifs_writepages, 5099 .write_begin = cifs_write_begin, 5100 .write_end = cifs_write_end, 5101 .dirty_folio = netfs_dirty_folio, 5102 .release_folio = cifs_release_folio, 5103 .direct_IO = cifs_direct_io, 5104 .invalidate_folio = cifs_invalidate_folio, 5105 .launder_folio = cifs_launder_folio, 5106 .migrate_folio = filemap_migrate_folio, 5107 /* 5108 * TODO: investigate and if useful we could add an is_dirty_writeback 5109 * helper if needed 5110 */ 5111 .swap_activate = cifs_swap_activate, 5112 .swap_deactivate = cifs_swap_deactivate, 5113 }; 5114 5115 /* 5116 * cifs_readahead requires the server to support a buffer large enough to 5117 * contain the header plus one complete page of data. Otherwise, we need 5118 * to leave cifs_readahead out of the address space operations. 5119 */ 5120 const struct address_space_operations cifs_addr_ops_smallbuf = { 5121 .read_folio = cifs_read_folio, 5122 .writepages = cifs_writepages, 5123 .write_begin = cifs_write_begin, 5124 .write_end = cifs_write_end, 5125 .dirty_folio = netfs_dirty_folio, 5126 .release_folio = cifs_release_folio, 5127 .invalidate_folio = cifs_invalidate_folio, 5128 .launder_folio = cifs_launder_folio, 5129 .migrate_folio = filemap_migrate_folio, 5130 }; 5131