1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * 4 * vfs operations that deal with files 5 * 6 * Copyright (C) International Business Machines Corp., 2002,2010 7 * Author(s): Steve French (sfrench@us.ibm.com) 8 * Jeremy Allison (jra@samba.org) 9 * 10 */ 11 #include <linux/fs.h> 12 #include <linux/filelock.h> 13 #include <linux/backing-dev.h> 14 #include <linux/stat.h> 15 #include <linux/fcntl.h> 16 #include <linux/pagemap.h> 17 #include <linux/pagevec.h> 18 #include <linux/writeback.h> 19 #include <linux/task_io_accounting_ops.h> 20 #include <linux/delay.h> 21 #include <linux/mount.h> 22 #include <linux/slab.h> 23 #include <linux/swap.h> 24 #include <linux/mm.h> 25 #include <asm/div64.h> 26 #include "cifsfs.h" 27 #include "cifspdu.h" 28 #include "cifsglob.h" 29 #include "cifsproto.h" 30 #include "smb2proto.h" 31 #include "cifs_unicode.h" 32 #include "cifs_debug.h" 33 #include "cifs_fs_sb.h" 34 #include "fscache.h" 35 #include "smbdirect.h" 36 #include "fs_context.h" 37 #include "cifs_ioctl.h" 38 #include "cached_dir.h" 39 40 /* 41 * Remove the dirty flags from a span of pages. 42 */ 43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len) 44 { 45 struct address_space *mapping = inode->i_mapping; 46 struct folio *folio; 47 pgoff_t end; 48 49 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 50 51 rcu_read_lock(); 52 53 end = (start + len - 1) / PAGE_SIZE; 54 xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) { 55 if (xas_retry(&xas, folio)) 56 continue; 57 xas_pause(&xas); 58 rcu_read_unlock(); 59 folio_lock(folio); 60 folio_clear_dirty_for_io(folio); 61 folio_unlock(folio); 62 rcu_read_lock(); 63 } 64 65 rcu_read_unlock(); 66 } 67 68 /* 69 * Completion of write to server. 70 */ 71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len) 72 { 73 struct address_space *mapping = inode->i_mapping; 74 struct folio *folio; 75 pgoff_t end; 76 77 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 78 79 if (!len) 80 return; 81 82 rcu_read_lock(); 83 84 end = (start + len - 1) / PAGE_SIZE; 85 xas_for_each(&xas, folio, end) { 86 if (xas_retry(&xas, folio)) 87 continue; 88 if (!folio_test_writeback(folio)) { 89 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 90 len, start, folio->index, end); 91 continue; 92 } 93 94 folio_detach_private(folio); 95 folio_end_writeback(folio); 96 } 97 98 rcu_read_unlock(); 99 } 100 101 /* 102 * Failure of write to server. 103 */ 104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len) 105 { 106 struct address_space *mapping = inode->i_mapping; 107 struct folio *folio; 108 pgoff_t end; 109 110 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 111 112 if (!len) 113 return; 114 115 rcu_read_lock(); 116 117 end = (start + len - 1) / PAGE_SIZE; 118 xas_for_each(&xas, folio, end) { 119 if (xas_retry(&xas, folio)) 120 continue; 121 if (!folio_test_writeback(folio)) { 122 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 123 len, start, folio->index, end); 124 continue; 125 } 126 127 folio_set_error(folio); 128 folio_end_writeback(folio); 129 } 130 131 rcu_read_unlock(); 132 } 133 134 /* 135 * Redirty pages after a temporary failure. 136 */ 137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len) 138 { 139 struct address_space *mapping = inode->i_mapping; 140 struct folio *folio; 141 pgoff_t end; 142 143 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 144 145 if (!len) 146 return; 147 148 rcu_read_lock(); 149 150 end = (start + len - 1) / PAGE_SIZE; 151 xas_for_each(&xas, folio, end) { 152 if (!folio_test_writeback(folio)) { 153 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 154 len, start, folio->index, end); 155 continue; 156 } 157 158 filemap_dirty_folio(folio->mapping, folio); 159 folio_end_writeback(folio); 160 } 161 162 rcu_read_unlock(); 163 } 164 165 /* 166 * Mark as invalid, all open files on tree connections since they 167 * were closed when session to server was lost. 168 */ 169 void 170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon) 171 { 172 struct cifsFileInfo *open_file = NULL; 173 struct list_head *tmp; 174 struct list_head *tmp1; 175 176 /* only send once per connect */ 177 spin_lock(&tcon->tc_lock); 178 if (tcon->need_reconnect) 179 tcon->status = TID_NEED_RECON; 180 181 if (tcon->status != TID_NEED_RECON) { 182 spin_unlock(&tcon->tc_lock); 183 return; 184 } 185 tcon->status = TID_IN_FILES_INVALIDATE; 186 spin_unlock(&tcon->tc_lock); 187 188 /* list all files open on tree connection and mark them invalid */ 189 spin_lock(&tcon->open_file_lock); 190 list_for_each_safe(tmp, tmp1, &tcon->openFileList) { 191 open_file = list_entry(tmp, struct cifsFileInfo, tlist); 192 open_file->invalidHandle = true; 193 open_file->oplock_break_cancelled = true; 194 } 195 spin_unlock(&tcon->open_file_lock); 196 197 invalidate_all_cached_dirs(tcon); 198 spin_lock(&tcon->tc_lock); 199 if (tcon->status == TID_IN_FILES_INVALIDATE) 200 tcon->status = TID_NEED_TCON; 201 spin_unlock(&tcon->tc_lock); 202 203 /* 204 * BB Add call to invalidate_inodes(sb) for all superblocks mounted 205 * to this tcon. 206 */ 207 } 208 209 static inline int cifs_convert_flags(unsigned int flags) 210 { 211 if ((flags & O_ACCMODE) == O_RDONLY) 212 return GENERIC_READ; 213 else if ((flags & O_ACCMODE) == O_WRONLY) 214 return GENERIC_WRITE; 215 else if ((flags & O_ACCMODE) == O_RDWR) { 216 /* GENERIC_ALL is too much permission to request 217 can cause unnecessary access denied on create */ 218 /* return GENERIC_ALL; */ 219 return (GENERIC_READ | GENERIC_WRITE); 220 } 221 222 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES | 223 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA | 224 FILE_READ_DATA); 225 } 226 227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 228 static u32 cifs_posix_convert_flags(unsigned int flags) 229 { 230 u32 posix_flags = 0; 231 232 if ((flags & O_ACCMODE) == O_RDONLY) 233 posix_flags = SMB_O_RDONLY; 234 else if ((flags & O_ACCMODE) == O_WRONLY) 235 posix_flags = SMB_O_WRONLY; 236 else if ((flags & O_ACCMODE) == O_RDWR) 237 posix_flags = SMB_O_RDWR; 238 239 if (flags & O_CREAT) { 240 posix_flags |= SMB_O_CREAT; 241 if (flags & O_EXCL) 242 posix_flags |= SMB_O_EXCL; 243 } else if (flags & O_EXCL) 244 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n", 245 current->comm, current->tgid); 246 247 if (flags & O_TRUNC) 248 posix_flags |= SMB_O_TRUNC; 249 /* be safe and imply O_SYNC for O_DSYNC */ 250 if (flags & O_DSYNC) 251 posix_flags |= SMB_O_SYNC; 252 if (flags & O_DIRECTORY) 253 posix_flags |= SMB_O_DIRECTORY; 254 if (flags & O_NOFOLLOW) 255 posix_flags |= SMB_O_NOFOLLOW; 256 if (flags & O_DIRECT) 257 posix_flags |= SMB_O_DIRECT; 258 259 return posix_flags; 260 } 261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 262 263 static inline int cifs_get_disposition(unsigned int flags) 264 { 265 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) 266 return FILE_CREATE; 267 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) 268 return FILE_OVERWRITE_IF; 269 else if ((flags & O_CREAT) == O_CREAT) 270 return FILE_OPEN_IF; 271 else if ((flags & O_TRUNC) == O_TRUNC) 272 return FILE_OVERWRITE; 273 else 274 return FILE_OPEN; 275 } 276 277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 278 int cifs_posix_open(const char *full_path, struct inode **pinode, 279 struct super_block *sb, int mode, unsigned int f_flags, 280 __u32 *poplock, __u16 *pnetfid, unsigned int xid) 281 { 282 int rc; 283 FILE_UNIX_BASIC_INFO *presp_data; 284 __u32 posix_flags = 0; 285 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 286 struct cifs_fattr fattr; 287 struct tcon_link *tlink; 288 struct cifs_tcon *tcon; 289 290 cifs_dbg(FYI, "posix open %s\n", full_path); 291 292 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); 293 if (presp_data == NULL) 294 return -ENOMEM; 295 296 tlink = cifs_sb_tlink(cifs_sb); 297 if (IS_ERR(tlink)) { 298 rc = PTR_ERR(tlink); 299 goto posix_open_ret; 300 } 301 302 tcon = tlink_tcon(tlink); 303 mode &= ~current_umask(); 304 305 posix_flags = cifs_posix_convert_flags(f_flags); 306 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, 307 poplock, full_path, cifs_sb->local_nls, 308 cifs_remap(cifs_sb)); 309 cifs_put_tlink(tlink); 310 311 if (rc) 312 goto posix_open_ret; 313 314 if (presp_data->Type == cpu_to_le32(-1)) 315 goto posix_open_ret; /* open ok, caller does qpathinfo */ 316 317 if (!pinode) 318 goto posix_open_ret; /* caller does not need info */ 319 320 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb); 321 322 /* get new inode and set it up */ 323 if (*pinode == NULL) { 324 cifs_fill_uniqueid(sb, &fattr); 325 *pinode = cifs_iget(sb, &fattr); 326 if (!*pinode) { 327 rc = -ENOMEM; 328 goto posix_open_ret; 329 } 330 } else { 331 cifs_revalidate_mapping(*pinode); 332 rc = cifs_fattr_to_inode(*pinode, &fattr); 333 } 334 335 posix_open_ret: 336 kfree(presp_data); 337 return rc; 338 } 339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 340 341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, 342 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock, 343 struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf) 344 { 345 int rc; 346 int desired_access; 347 int disposition; 348 int create_options = CREATE_NOT_DIR; 349 struct TCP_Server_Info *server = tcon->ses->server; 350 struct cifs_open_parms oparms; 351 352 if (!server->ops->open) 353 return -ENOSYS; 354 355 desired_access = cifs_convert_flags(f_flags); 356 357 /********************************************************************* 358 * open flag mapping table: 359 * 360 * POSIX Flag CIFS Disposition 361 * ---------- ---------------- 362 * O_CREAT FILE_OPEN_IF 363 * O_CREAT | O_EXCL FILE_CREATE 364 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF 365 * O_TRUNC FILE_OVERWRITE 366 * none of the above FILE_OPEN 367 * 368 * Note that there is not a direct match between disposition 369 * FILE_SUPERSEDE (ie create whether or not file exists although 370 * O_CREAT | O_TRUNC is similar but truncates the existing 371 * file rather than creating a new file as FILE_SUPERSEDE does 372 * (which uses the attributes / metadata passed in on open call) 373 *? 374 *? O_SYNC is a reasonable match to CIFS writethrough flag 375 *? and the read write flags match reasonably. O_LARGEFILE 376 *? is irrelevant because largefile support is always used 377 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY, 378 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation 379 *********************************************************************/ 380 381 disposition = cifs_get_disposition(f_flags); 382 383 /* BB pass O_SYNC flag through on file attributes .. BB */ 384 385 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 386 if (f_flags & O_SYNC) 387 create_options |= CREATE_WRITE_THROUGH; 388 389 if (f_flags & O_DIRECT) 390 create_options |= CREATE_NO_BUFFER; 391 392 oparms = (struct cifs_open_parms) { 393 .tcon = tcon, 394 .cifs_sb = cifs_sb, 395 .desired_access = desired_access, 396 .create_options = cifs_create_options(cifs_sb, create_options), 397 .disposition = disposition, 398 .path = full_path, 399 .fid = fid, 400 }; 401 402 rc = server->ops->open(xid, &oparms, oplock, buf); 403 if (rc) 404 return rc; 405 406 /* TODO: Add support for calling posix query info but with passing in fid */ 407 if (tcon->unix_ext) 408 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, 409 xid); 410 else 411 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, 412 xid, fid); 413 414 if (rc) { 415 server->ops->close(xid, tcon, fid); 416 if (rc == -ESTALE) 417 rc = -EOPENSTALE; 418 } 419 420 return rc; 421 } 422 423 static bool 424 cifs_has_mand_locks(struct cifsInodeInfo *cinode) 425 { 426 struct cifs_fid_locks *cur; 427 bool has_locks = false; 428 429 down_read(&cinode->lock_sem); 430 list_for_each_entry(cur, &cinode->llist, llist) { 431 if (!list_empty(&cur->locks)) { 432 has_locks = true; 433 break; 434 } 435 } 436 up_read(&cinode->lock_sem); 437 return has_locks; 438 } 439 440 void 441 cifs_down_write(struct rw_semaphore *sem) 442 { 443 while (!down_write_trylock(sem)) 444 msleep(10); 445 } 446 447 static void cifsFileInfo_put_work(struct work_struct *work); 448 449 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, 450 struct tcon_link *tlink, __u32 oplock, 451 const char *symlink_target) 452 { 453 struct dentry *dentry = file_dentry(file); 454 struct inode *inode = d_inode(dentry); 455 struct cifsInodeInfo *cinode = CIFS_I(inode); 456 struct cifsFileInfo *cfile; 457 struct cifs_fid_locks *fdlocks; 458 struct cifs_tcon *tcon = tlink_tcon(tlink); 459 struct TCP_Server_Info *server = tcon->ses->server; 460 461 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 462 if (cfile == NULL) 463 return cfile; 464 465 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL); 466 if (!fdlocks) { 467 kfree(cfile); 468 return NULL; 469 } 470 471 if (symlink_target) { 472 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL); 473 if (!cfile->symlink_target) { 474 kfree(fdlocks); 475 kfree(cfile); 476 return NULL; 477 } 478 } 479 480 INIT_LIST_HEAD(&fdlocks->locks); 481 fdlocks->cfile = cfile; 482 cfile->llist = fdlocks; 483 484 cfile->count = 1; 485 cfile->pid = current->tgid; 486 cfile->uid = current_fsuid(); 487 cfile->dentry = dget(dentry); 488 cfile->f_flags = file->f_flags; 489 cfile->invalidHandle = false; 490 cfile->deferred_close_scheduled = false; 491 cfile->tlink = cifs_get_tlink(tlink); 492 INIT_WORK(&cfile->oplock_break, cifs_oplock_break); 493 INIT_WORK(&cfile->put, cifsFileInfo_put_work); 494 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); 495 mutex_init(&cfile->fh_mutex); 496 spin_lock_init(&cfile->file_info_lock); 497 498 cifs_sb_active(inode->i_sb); 499 500 /* 501 * If the server returned a read oplock and we have mandatory brlocks, 502 * set oplock level to None. 503 */ 504 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 505 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 506 oplock = 0; 507 } 508 509 cifs_down_write(&cinode->lock_sem); 510 list_add(&fdlocks->llist, &cinode->llist); 511 up_write(&cinode->lock_sem); 512 513 spin_lock(&tcon->open_file_lock); 514 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) 515 oplock = fid->pending_open->oplock; 516 list_del(&fid->pending_open->olist); 517 518 fid->purge_cache = false; 519 server->ops->set_fid(cfile, fid, oplock); 520 521 list_add(&cfile->tlist, &tcon->openFileList); 522 atomic_inc(&tcon->num_local_opens); 523 524 /* if readable file instance put first in list*/ 525 spin_lock(&cinode->open_file_lock); 526 if (file->f_mode & FMODE_READ) 527 list_add(&cfile->flist, &cinode->openFileList); 528 else 529 list_add_tail(&cfile->flist, &cinode->openFileList); 530 spin_unlock(&cinode->open_file_lock); 531 spin_unlock(&tcon->open_file_lock); 532 533 if (fid->purge_cache) 534 cifs_zap_mapping(inode); 535 536 file->private_data = cfile; 537 return cfile; 538 } 539 540 struct cifsFileInfo * 541 cifsFileInfo_get(struct cifsFileInfo *cifs_file) 542 { 543 spin_lock(&cifs_file->file_info_lock); 544 cifsFileInfo_get_locked(cifs_file); 545 spin_unlock(&cifs_file->file_info_lock); 546 return cifs_file; 547 } 548 549 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) 550 { 551 struct inode *inode = d_inode(cifs_file->dentry); 552 struct cifsInodeInfo *cifsi = CIFS_I(inode); 553 struct cifsLockInfo *li, *tmp; 554 struct super_block *sb = inode->i_sb; 555 556 /* 557 * Delete any outstanding lock records. We'll lose them when the file 558 * is closed anyway. 559 */ 560 cifs_down_write(&cifsi->lock_sem); 561 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { 562 list_del(&li->llist); 563 cifs_del_lock_waiters(li); 564 kfree(li); 565 } 566 list_del(&cifs_file->llist->llist); 567 kfree(cifs_file->llist); 568 up_write(&cifsi->lock_sem); 569 570 cifs_put_tlink(cifs_file->tlink); 571 dput(cifs_file->dentry); 572 cifs_sb_deactive(sb); 573 kfree(cifs_file->symlink_target); 574 kfree(cifs_file); 575 } 576 577 static void cifsFileInfo_put_work(struct work_struct *work) 578 { 579 struct cifsFileInfo *cifs_file = container_of(work, 580 struct cifsFileInfo, put); 581 582 cifsFileInfo_put_final(cifs_file); 583 } 584 585 /** 586 * cifsFileInfo_put - release a reference of file priv data 587 * 588 * Always potentially wait for oplock handler. See _cifsFileInfo_put(). 589 * 590 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 591 */ 592 void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 593 { 594 _cifsFileInfo_put(cifs_file, true, true); 595 } 596 597 /** 598 * _cifsFileInfo_put - release a reference of file priv data 599 * 600 * This may involve closing the filehandle @cifs_file out on the 601 * server. Must be called without holding tcon->open_file_lock, 602 * cinode->open_file_lock and cifs_file->file_info_lock. 603 * 604 * If @wait_for_oplock_handler is true and we are releasing the last 605 * reference, wait for any running oplock break handler of the file 606 * and cancel any pending one. 607 * 608 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 609 * @wait_oplock_handler: must be false if called from oplock_break_handler 610 * @offload: not offloaded on close and oplock breaks 611 * 612 */ 613 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, 614 bool wait_oplock_handler, bool offload) 615 { 616 struct inode *inode = d_inode(cifs_file->dentry); 617 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 618 struct TCP_Server_Info *server = tcon->ses->server; 619 struct cifsInodeInfo *cifsi = CIFS_I(inode); 620 struct super_block *sb = inode->i_sb; 621 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 622 struct cifs_fid fid = {}; 623 struct cifs_pending_open open; 624 bool oplock_break_cancelled; 625 626 spin_lock(&tcon->open_file_lock); 627 spin_lock(&cifsi->open_file_lock); 628 spin_lock(&cifs_file->file_info_lock); 629 if (--cifs_file->count > 0) { 630 spin_unlock(&cifs_file->file_info_lock); 631 spin_unlock(&cifsi->open_file_lock); 632 spin_unlock(&tcon->open_file_lock); 633 return; 634 } 635 spin_unlock(&cifs_file->file_info_lock); 636 637 if (server->ops->get_lease_key) 638 server->ops->get_lease_key(inode, &fid); 639 640 /* store open in pending opens to make sure we don't miss lease break */ 641 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); 642 643 /* remove it from the lists */ 644 list_del(&cifs_file->flist); 645 list_del(&cifs_file->tlist); 646 atomic_dec(&tcon->num_local_opens); 647 648 if (list_empty(&cifsi->openFileList)) { 649 cifs_dbg(FYI, "closing last open instance for inode %p\n", 650 d_inode(cifs_file->dentry)); 651 /* 652 * In strict cache mode we need invalidate mapping on the last 653 * close because it may cause a error when we open this file 654 * again and get at least level II oplock. 655 */ 656 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) 657 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags); 658 cifs_set_oplock_level(cifsi, 0); 659 } 660 661 spin_unlock(&cifsi->open_file_lock); 662 spin_unlock(&tcon->open_file_lock); 663 664 oplock_break_cancelled = wait_oplock_handler ? 665 cancel_work_sync(&cifs_file->oplock_break) : false; 666 667 if (!tcon->need_reconnect && !cifs_file->invalidHandle) { 668 struct TCP_Server_Info *server = tcon->ses->server; 669 unsigned int xid; 670 671 xid = get_xid(); 672 if (server->ops->close_getattr) 673 server->ops->close_getattr(xid, tcon, cifs_file); 674 else if (server->ops->close) 675 server->ops->close(xid, tcon, &cifs_file->fid); 676 _free_xid(xid); 677 } 678 679 if (oplock_break_cancelled) 680 cifs_done_oplock_break(cifsi); 681 682 cifs_del_pending_open(&open); 683 684 if (offload) 685 queue_work(fileinfo_put_wq, &cifs_file->put); 686 else 687 cifsFileInfo_put_final(cifs_file); 688 } 689 690 int cifs_open(struct inode *inode, struct file *file) 691 692 { 693 int rc = -EACCES; 694 unsigned int xid; 695 __u32 oplock; 696 struct cifs_sb_info *cifs_sb; 697 struct TCP_Server_Info *server; 698 struct cifs_tcon *tcon; 699 struct tcon_link *tlink; 700 struct cifsFileInfo *cfile = NULL; 701 void *page; 702 const char *full_path; 703 bool posix_open_ok = false; 704 struct cifs_fid fid = {}; 705 struct cifs_pending_open open; 706 struct cifs_open_info_data data = {}; 707 708 xid = get_xid(); 709 710 cifs_sb = CIFS_SB(inode->i_sb); 711 if (unlikely(cifs_forced_shutdown(cifs_sb))) { 712 free_xid(xid); 713 return -EIO; 714 } 715 716 tlink = cifs_sb_tlink(cifs_sb); 717 if (IS_ERR(tlink)) { 718 free_xid(xid); 719 return PTR_ERR(tlink); 720 } 721 tcon = tlink_tcon(tlink); 722 server = tcon->ses->server; 723 724 page = alloc_dentry_path(); 725 full_path = build_path_from_dentry(file_dentry(file), page); 726 if (IS_ERR(full_path)) { 727 rc = PTR_ERR(full_path); 728 goto out; 729 } 730 731 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", 732 inode, file->f_flags, full_path); 733 734 if (file->f_flags & O_DIRECT && 735 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { 736 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 737 file->f_op = &cifs_file_direct_nobrl_ops; 738 else 739 file->f_op = &cifs_file_direct_ops; 740 } 741 742 /* Get the cached handle as SMB2 close is deferred */ 743 rc = cifs_get_readable_path(tcon, full_path, &cfile); 744 if (rc == 0) { 745 if (file->f_flags == cfile->f_flags) { 746 file->private_data = cfile; 747 spin_lock(&CIFS_I(inode)->deferred_lock); 748 cifs_del_deferred_close(cfile); 749 spin_unlock(&CIFS_I(inode)->deferred_lock); 750 goto use_cache; 751 } else { 752 _cifsFileInfo_put(cfile, true, false); 753 } 754 } 755 756 if (server->oplocks) 757 oplock = REQ_OPLOCK; 758 else 759 oplock = 0; 760 761 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 762 if (!tcon->broken_posix_open && tcon->unix_ext && 763 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & 764 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 765 /* can not refresh inode info since size could be stale */ 766 rc = cifs_posix_open(full_path, &inode, inode->i_sb, 767 cifs_sb->ctx->file_mode /* ignored */, 768 file->f_flags, &oplock, &fid.netfid, xid); 769 if (rc == 0) { 770 cifs_dbg(FYI, "posix open succeeded\n"); 771 posix_open_ok = true; 772 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 773 if (tcon->ses->serverNOS) 774 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n", 775 tcon->ses->ip_addr, 776 tcon->ses->serverNOS); 777 tcon->broken_posix_open = true; 778 } else if ((rc != -EIO) && (rc != -EREMOTE) && 779 (rc != -EOPNOTSUPP)) /* path not found or net err */ 780 goto out; 781 /* 782 * Else fallthrough to retry open the old way on network i/o 783 * or DFS errors. 784 */ 785 } 786 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 787 788 if (server->ops->get_lease_key) 789 server->ops->get_lease_key(inode, &fid); 790 791 cifs_add_pending_open(&fid, tlink, &open); 792 793 if (!posix_open_ok) { 794 if (server->ops->get_lease_key) 795 server->ops->get_lease_key(inode, &fid); 796 797 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid, 798 xid, &data); 799 if (rc) { 800 cifs_del_pending_open(&open); 801 goto out; 802 } 803 } 804 805 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target); 806 if (cfile == NULL) { 807 if (server->ops->close) 808 server->ops->close(xid, tcon, &fid); 809 cifs_del_pending_open(&open); 810 rc = -ENOMEM; 811 goto out; 812 } 813 814 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 815 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { 816 /* 817 * Time to set mode which we can not set earlier due to 818 * problems creating new read-only files. 819 */ 820 struct cifs_unix_set_info_args args = { 821 .mode = inode->i_mode, 822 .uid = INVALID_UID, /* no change */ 823 .gid = INVALID_GID, /* no change */ 824 .ctime = NO_CHANGE_64, 825 .atime = NO_CHANGE_64, 826 .mtime = NO_CHANGE_64, 827 .device = 0, 828 }; 829 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid, 830 cfile->pid); 831 } 832 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 833 834 use_cache: 835 fscache_use_cookie(cifs_inode_cookie(file_inode(file)), 836 file->f_mode & FMODE_WRITE); 837 if (file->f_flags & O_DIRECT && 838 (!((file->f_flags & O_ACCMODE) != O_RDONLY) || 839 file->f_flags & O_APPEND)) 840 cifs_invalidate_cache(file_inode(file), 841 FSCACHE_INVAL_DIO_WRITE); 842 843 out: 844 free_dentry_path(page); 845 free_xid(xid); 846 cifs_put_tlink(tlink); 847 cifs_free_open_info(&data); 848 return rc; 849 } 850 851 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 852 static int cifs_push_posix_locks(struct cifsFileInfo *cfile); 853 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 854 855 /* 856 * Try to reacquire byte range locks that were released when session 857 * to server was lost. 858 */ 859 static int 860 cifs_relock_file(struct cifsFileInfo *cfile) 861 { 862 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 863 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 864 int rc = 0; 865 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 866 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 867 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 868 869 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); 870 if (cinode->can_cache_brlcks) { 871 /* can cache locks - no need to relock */ 872 up_read(&cinode->lock_sem); 873 return rc; 874 } 875 876 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 877 if (cap_unix(tcon->ses) && 878 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 879 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 880 rc = cifs_push_posix_locks(cfile); 881 else 882 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 883 rc = tcon->ses->server->ops->push_mand_locks(cfile); 884 885 up_read(&cinode->lock_sem); 886 return rc; 887 } 888 889 static int 890 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) 891 { 892 int rc = -EACCES; 893 unsigned int xid; 894 __u32 oplock; 895 struct cifs_sb_info *cifs_sb; 896 struct cifs_tcon *tcon; 897 struct TCP_Server_Info *server; 898 struct cifsInodeInfo *cinode; 899 struct inode *inode; 900 void *page; 901 const char *full_path; 902 int desired_access; 903 int disposition = FILE_OPEN; 904 int create_options = CREATE_NOT_DIR; 905 struct cifs_open_parms oparms; 906 907 xid = get_xid(); 908 mutex_lock(&cfile->fh_mutex); 909 if (!cfile->invalidHandle) { 910 mutex_unlock(&cfile->fh_mutex); 911 free_xid(xid); 912 return 0; 913 } 914 915 inode = d_inode(cfile->dentry); 916 cifs_sb = CIFS_SB(inode->i_sb); 917 tcon = tlink_tcon(cfile->tlink); 918 server = tcon->ses->server; 919 920 /* 921 * Can not grab rename sem here because various ops, including those 922 * that already have the rename sem can end up causing writepage to get 923 * called and if the server was down that means we end up here, and we 924 * can never tell if the caller already has the rename_sem. 925 */ 926 page = alloc_dentry_path(); 927 full_path = build_path_from_dentry(cfile->dentry, page); 928 if (IS_ERR(full_path)) { 929 mutex_unlock(&cfile->fh_mutex); 930 free_dentry_path(page); 931 free_xid(xid); 932 return PTR_ERR(full_path); 933 } 934 935 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n", 936 inode, cfile->f_flags, full_path); 937 938 if (tcon->ses->server->oplocks) 939 oplock = REQ_OPLOCK; 940 else 941 oplock = 0; 942 943 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 944 if (tcon->unix_ext && cap_unix(tcon->ses) && 945 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 946 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 947 /* 948 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the 949 * original open. Must mask them off for a reopen. 950 */ 951 unsigned int oflags = cfile->f_flags & 952 ~(O_CREAT | O_EXCL | O_TRUNC); 953 954 rc = cifs_posix_open(full_path, NULL, inode->i_sb, 955 cifs_sb->ctx->file_mode /* ignored */, 956 oflags, &oplock, &cfile->fid.netfid, xid); 957 if (rc == 0) { 958 cifs_dbg(FYI, "posix reopen succeeded\n"); 959 oparms.reconnect = true; 960 goto reopen_success; 961 } 962 /* 963 * fallthrough to retry open the old way on errors, especially 964 * in the reconnect path it is important to retry hard 965 */ 966 } 967 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 968 969 desired_access = cifs_convert_flags(cfile->f_flags); 970 971 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 972 if (cfile->f_flags & O_SYNC) 973 create_options |= CREATE_WRITE_THROUGH; 974 975 if (cfile->f_flags & O_DIRECT) 976 create_options |= CREATE_NO_BUFFER; 977 978 if (server->ops->get_lease_key) 979 server->ops->get_lease_key(inode, &cfile->fid); 980 981 oparms = (struct cifs_open_parms) { 982 .tcon = tcon, 983 .cifs_sb = cifs_sb, 984 .desired_access = desired_access, 985 .create_options = cifs_create_options(cifs_sb, create_options), 986 .disposition = disposition, 987 .path = full_path, 988 .fid = &cfile->fid, 989 .reconnect = true, 990 }; 991 992 /* 993 * Can not refresh inode by passing in file_info buf to be returned by 994 * ops->open and then calling get_inode_info with returned buf since 995 * file might have write behind data that needs to be flushed and server 996 * version of file size can be stale. If we knew for sure that inode was 997 * not dirty locally we could do this. 998 */ 999 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1000 if (rc == -ENOENT && oparms.reconnect == false) { 1001 /* durable handle timeout is expired - open the file again */ 1002 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1003 /* indicate that we need to relock the file */ 1004 oparms.reconnect = true; 1005 } 1006 1007 if (rc) { 1008 mutex_unlock(&cfile->fh_mutex); 1009 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); 1010 cifs_dbg(FYI, "oplock: %d\n", oplock); 1011 goto reopen_error_exit; 1012 } 1013 1014 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1015 reopen_success: 1016 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1017 cfile->invalidHandle = false; 1018 mutex_unlock(&cfile->fh_mutex); 1019 cinode = CIFS_I(inode); 1020 1021 if (can_flush) { 1022 rc = filemap_write_and_wait(inode->i_mapping); 1023 if (!is_interrupt_error(rc)) 1024 mapping_set_error(inode->i_mapping, rc); 1025 1026 if (tcon->posix_extensions) { 1027 rc = smb311_posix_get_inode_info(&inode, full_path, 1028 NULL, inode->i_sb, xid); 1029 } else if (tcon->unix_ext) { 1030 rc = cifs_get_inode_info_unix(&inode, full_path, 1031 inode->i_sb, xid); 1032 } else { 1033 rc = cifs_get_inode_info(&inode, full_path, NULL, 1034 inode->i_sb, xid, NULL); 1035 } 1036 } 1037 /* 1038 * Else we are writing out data to server already and could deadlock if 1039 * we tried to flush data, and since we do not know if we have data that 1040 * would invalidate the current end of file on the server we can not go 1041 * to the server to get the new inode info. 1042 */ 1043 1044 /* 1045 * If the server returned a read oplock and we have mandatory brlocks, 1046 * set oplock level to None. 1047 */ 1048 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 1049 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 1050 oplock = 0; 1051 } 1052 1053 server->ops->set_fid(cfile, &cfile->fid, oplock); 1054 if (oparms.reconnect) 1055 cifs_relock_file(cfile); 1056 1057 reopen_error_exit: 1058 free_dentry_path(page); 1059 free_xid(xid); 1060 return rc; 1061 } 1062 1063 void smb2_deferred_work_close(struct work_struct *work) 1064 { 1065 struct cifsFileInfo *cfile = container_of(work, 1066 struct cifsFileInfo, deferred.work); 1067 1068 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1069 cifs_del_deferred_close(cfile); 1070 cfile->deferred_close_scheduled = false; 1071 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1072 _cifsFileInfo_put(cfile, true, false); 1073 } 1074 1075 int cifs_close(struct inode *inode, struct file *file) 1076 { 1077 struct cifsFileInfo *cfile; 1078 struct cifsInodeInfo *cinode = CIFS_I(inode); 1079 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1080 struct cifs_deferred_close *dclose; 1081 1082 cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE); 1083 1084 if (file->private_data != NULL) { 1085 cfile = file->private_data; 1086 file->private_data = NULL; 1087 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); 1088 if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG) 1089 && cinode->lease_granted && 1090 !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) && 1091 dclose) { 1092 if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { 1093 inode_set_mtime_to_ts(inode, 1094 inode_set_ctime_current(inode)); 1095 } 1096 spin_lock(&cinode->deferred_lock); 1097 cifs_add_deferred_close(cfile, dclose); 1098 if (cfile->deferred_close_scheduled && 1099 delayed_work_pending(&cfile->deferred)) { 1100 /* 1101 * If there is no pending work, mod_delayed_work queues new work. 1102 * So, Increase the ref count to avoid use-after-free. 1103 */ 1104 if (!mod_delayed_work(deferredclose_wq, 1105 &cfile->deferred, cifs_sb->ctx->closetimeo)) 1106 cifsFileInfo_get(cfile); 1107 } else { 1108 /* Deferred close for files */ 1109 queue_delayed_work(deferredclose_wq, 1110 &cfile->deferred, cifs_sb->ctx->closetimeo); 1111 cfile->deferred_close_scheduled = true; 1112 spin_unlock(&cinode->deferred_lock); 1113 return 0; 1114 } 1115 spin_unlock(&cinode->deferred_lock); 1116 _cifsFileInfo_put(cfile, true, false); 1117 } else { 1118 _cifsFileInfo_put(cfile, true, false); 1119 kfree(dclose); 1120 } 1121 } 1122 1123 /* return code from the ->release op is always ignored */ 1124 return 0; 1125 } 1126 1127 void 1128 cifs_reopen_persistent_handles(struct cifs_tcon *tcon) 1129 { 1130 struct cifsFileInfo *open_file, *tmp; 1131 struct list_head tmp_list; 1132 1133 if (!tcon->use_persistent || !tcon->need_reopen_files) 1134 return; 1135 1136 tcon->need_reopen_files = false; 1137 1138 cifs_dbg(FYI, "Reopen persistent handles\n"); 1139 INIT_LIST_HEAD(&tmp_list); 1140 1141 /* list all files open on tree connection, reopen resilient handles */ 1142 spin_lock(&tcon->open_file_lock); 1143 list_for_each_entry(open_file, &tcon->openFileList, tlist) { 1144 if (!open_file->invalidHandle) 1145 continue; 1146 cifsFileInfo_get(open_file); 1147 list_add_tail(&open_file->rlist, &tmp_list); 1148 } 1149 spin_unlock(&tcon->open_file_lock); 1150 1151 list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) { 1152 if (cifs_reopen_file(open_file, false /* do not flush */)) 1153 tcon->need_reopen_files = true; 1154 list_del_init(&open_file->rlist); 1155 cifsFileInfo_put(open_file); 1156 } 1157 } 1158 1159 int cifs_closedir(struct inode *inode, struct file *file) 1160 { 1161 int rc = 0; 1162 unsigned int xid; 1163 struct cifsFileInfo *cfile = file->private_data; 1164 struct cifs_tcon *tcon; 1165 struct TCP_Server_Info *server; 1166 char *buf; 1167 1168 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode); 1169 1170 if (cfile == NULL) 1171 return rc; 1172 1173 xid = get_xid(); 1174 tcon = tlink_tcon(cfile->tlink); 1175 server = tcon->ses->server; 1176 1177 cifs_dbg(FYI, "Freeing private data in close dir\n"); 1178 spin_lock(&cfile->file_info_lock); 1179 if (server->ops->dir_needs_close(cfile)) { 1180 cfile->invalidHandle = true; 1181 spin_unlock(&cfile->file_info_lock); 1182 if (server->ops->close_dir) 1183 rc = server->ops->close_dir(xid, tcon, &cfile->fid); 1184 else 1185 rc = -ENOSYS; 1186 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc); 1187 /* not much we can do if it fails anyway, ignore rc */ 1188 rc = 0; 1189 } else 1190 spin_unlock(&cfile->file_info_lock); 1191 1192 buf = cfile->srch_inf.ntwrk_buf_start; 1193 if (buf) { 1194 cifs_dbg(FYI, "closedir free smb buf in srch struct\n"); 1195 cfile->srch_inf.ntwrk_buf_start = NULL; 1196 if (cfile->srch_inf.smallBuf) 1197 cifs_small_buf_release(buf); 1198 else 1199 cifs_buf_release(buf); 1200 } 1201 1202 cifs_put_tlink(cfile->tlink); 1203 kfree(file->private_data); 1204 file->private_data = NULL; 1205 /* BB can we lock the filestruct while this is going on? */ 1206 free_xid(xid); 1207 return rc; 1208 } 1209 1210 static struct cifsLockInfo * 1211 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags) 1212 { 1213 struct cifsLockInfo *lock = 1214 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); 1215 if (!lock) 1216 return lock; 1217 lock->offset = offset; 1218 lock->length = length; 1219 lock->type = type; 1220 lock->pid = current->tgid; 1221 lock->flags = flags; 1222 INIT_LIST_HEAD(&lock->blist); 1223 init_waitqueue_head(&lock->block_q); 1224 return lock; 1225 } 1226 1227 void 1228 cifs_del_lock_waiters(struct cifsLockInfo *lock) 1229 { 1230 struct cifsLockInfo *li, *tmp; 1231 list_for_each_entry_safe(li, tmp, &lock->blist, blist) { 1232 list_del_init(&li->blist); 1233 wake_up(&li->block_q); 1234 } 1235 } 1236 1237 #define CIFS_LOCK_OP 0 1238 #define CIFS_READ_OP 1 1239 #define CIFS_WRITE_OP 2 1240 1241 /* @rw_check : 0 - no op, 1 - read, 2 - write */ 1242 static bool 1243 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, 1244 __u64 length, __u8 type, __u16 flags, 1245 struct cifsFileInfo *cfile, 1246 struct cifsLockInfo **conf_lock, int rw_check) 1247 { 1248 struct cifsLockInfo *li; 1249 struct cifsFileInfo *cur_cfile = fdlocks->cfile; 1250 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1251 1252 list_for_each_entry(li, &fdlocks->locks, llist) { 1253 if (offset + length <= li->offset || 1254 offset >= li->offset + li->length) 1255 continue; 1256 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid && 1257 server->ops->compare_fids(cfile, cur_cfile)) { 1258 /* shared lock prevents write op through the same fid */ 1259 if (!(li->type & server->vals->shared_lock_type) || 1260 rw_check != CIFS_WRITE_OP) 1261 continue; 1262 } 1263 if ((type & server->vals->shared_lock_type) && 1264 ((server->ops->compare_fids(cfile, cur_cfile) && 1265 current->tgid == li->pid) || type == li->type)) 1266 continue; 1267 if (rw_check == CIFS_LOCK_OP && 1268 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) && 1269 server->ops->compare_fids(cfile, cur_cfile)) 1270 continue; 1271 if (conf_lock) 1272 *conf_lock = li; 1273 return true; 1274 } 1275 return false; 1276 } 1277 1278 bool 1279 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1280 __u8 type, __u16 flags, 1281 struct cifsLockInfo **conf_lock, int rw_check) 1282 { 1283 bool rc = false; 1284 struct cifs_fid_locks *cur; 1285 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1286 1287 list_for_each_entry(cur, &cinode->llist, llist) { 1288 rc = cifs_find_fid_lock_conflict(cur, offset, length, type, 1289 flags, cfile, conf_lock, 1290 rw_check); 1291 if (rc) 1292 break; 1293 } 1294 1295 return rc; 1296 } 1297 1298 /* 1299 * Check if there is another lock that prevents us to set the lock (mandatory 1300 * style). If such a lock exists, update the flock structure with its 1301 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1302 * or leave it the same if we can't. Returns 0 if we don't need to request to 1303 * the server or 1 otherwise. 1304 */ 1305 static int 1306 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1307 __u8 type, struct file_lock *flock) 1308 { 1309 int rc = 0; 1310 struct cifsLockInfo *conf_lock; 1311 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1312 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1313 bool exist; 1314 1315 down_read(&cinode->lock_sem); 1316 1317 exist = cifs_find_lock_conflict(cfile, offset, length, type, 1318 flock->c.flc_flags, &conf_lock, 1319 CIFS_LOCK_OP); 1320 if (exist) { 1321 flock->fl_start = conf_lock->offset; 1322 flock->fl_end = conf_lock->offset + conf_lock->length - 1; 1323 flock->c.flc_pid = conf_lock->pid; 1324 if (conf_lock->type & server->vals->shared_lock_type) 1325 flock->c.flc_type = F_RDLCK; 1326 else 1327 flock->c.flc_type = F_WRLCK; 1328 } else if (!cinode->can_cache_brlcks) 1329 rc = 1; 1330 else 1331 flock->c.flc_type = F_UNLCK; 1332 1333 up_read(&cinode->lock_sem); 1334 return rc; 1335 } 1336 1337 static void 1338 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) 1339 { 1340 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1341 cifs_down_write(&cinode->lock_sem); 1342 list_add_tail(&lock->llist, &cfile->llist->locks); 1343 up_write(&cinode->lock_sem); 1344 } 1345 1346 /* 1347 * Set the byte-range lock (mandatory style). Returns: 1348 * 1) 0, if we set the lock and don't need to request to the server; 1349 * 2) 1, if no locks prevent us but we need to request to the server; 1350 * 3) -EACCES, if there is a lock that prevents us and wait is false. 1351 */ 1352 static int 1353 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, 1354 bool wait) 1355 { 1356 struct cifsLockInfo *conf_lock; 1357 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1358 bool exist; 1359 int rc = 0; 1360 1361 try_again: 1362 exist = false; 1363 cifs_down_write(&cinode->lock_sem); 1364 1365 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, 1366 lock->type, lock->flags, &conf_lock, 1367 CIFS_LOCK_OP); 1368 if (!exist && cinode->can_cache_brlcks) { 1369 list_add_tail(&lock->llist, &cfile->llist->locks); 1370 up_write(&cinode->lock_sem); 1371 return rc; 1372 } 1373 1374 if (!exist) 1375 rc = 1; 1376 else if (!wait) 1377 rc = -EACCES; 1378 else { 1379 list_add_tail(&lock->blist, &conf_lock->blist); 1380 up_write(&cinode->lock_sem); 1381 rc = wait_event_interruptible(lock->block_q, 1382 (lock->blist.prev == &lock->blist) && 1383 (lock->blist.next == &lock->blist)); 1384 if (!rc) 1385 goto try_again; 1386 cifs_down_write(&cinode->lock_sem); 1387 list_del_init(&lock->blist); 1388 } 1389 1390 up_write(&cinode->lock_sem); 1391 return rc; 1392 } 1393 1394 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1395 /* 1396 * Check if there is another lock that prevents us to set the lock (posix 1397 * style). If such a lock exists, update the flock structure with its 1398 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1399 * or leave it the same if we can't. Returns 0 if we don't need to request to 1400 * the server or 1 otherwise. 1401 */ 1402 static int 1403 cifs_posix_lock_test(struct file *file, struct file_lock *flock) 1404 { 1405 int rc = 0; 1406 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1407 unsigned char saved_type = flock->c.flc_type; 1408 1409 if ((flock->c.flc_flags & FL_POSIX) == 0) 1410 return 1; 1411 1412 down_read(&cinode->lock_sem); 1413 posix_test_lock(file, flock); 1414 1415 if (lock_is_unlock(flock) && !cinode->can_cache_brlcks) { 1416 flock->c.flc_type = saved_type; 1417 rc = 1; 1418 } 1419 1420 up_read(&cinode->lock_sem); 1421 return rc; 1422 } 1423 1424 /* 1425 * Set the byte-range lock (posix style). Returns: 1426 * 1) <0, if the error occurs while setting the lock; 1427 * 2) 0, if we set the lock and don't need to request to the server; 1428 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock; 1429 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server. 1430 */ 1431 static int 1432 cifs_posix_lock_set(struct file *file, struct file_lock *flock) 1433 { 1434 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1435 int rc = FILE_LOCK_DEFERRED + 1; 1436 1437 if ((flock->c.flc_flags & FL_POSIX) == 0) 1438 return rc; 1439 1440 cifs_down_write(&cinode->lock_sem); 1441 if (!cinode->can_cache_brlcks) { 1442 up_write(&cinode->lock_sem); 1443 return rc; 1444 } 1445 1446 rc = posix_lock_file(file, flock, NULL); 1447 up_write(&cinode->lock_sem); 1448 return rc; 1449 } 1450 1451 int 1452 cifs_push_mandatory_locks(struct cifsFileInfo *cfile) 1453 { 1454 unsigned int xid; 1455 int rc = 0, stored_rc; 1456 struct cifsLockInfo *li, *tmp; 1457 struct cifs_tcon *tcon; 1458 unsigned int num, max_num, max_buf; 1459 LOCKING_ANDX_RANGE *buf, *cur; 1460 static const int types[] = { 1461 LOCKING_ANDX_LARGE_FILES, 1462 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1463 }; 1464 int i; 1465 1466 xid = get_xid(); 1467 tcon = tlink_tcon(cfile->tlink); 1468 1469 /* 1470 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1471 * and check it before using. 1472 */ 1473 max_buf = tcon->ses->server->maxBuf; 1474 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) { 1475 free_xid(xid); 1476 return -EINVAL; 1477 } 1478 1479 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1480 PAGE_SIZE); 1481 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1482 PAGE_SIZE); 1483 max_num = (max_buf - sizeof(struct smb_hdr)) / 1484 sizeof(LOCKING_ANDX_RANGE); 1485 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1486 if (!buf) { 1487 free_xid(xid); 1488 return -ENOMEM; 1489 } 1490 1491 for (i = 0; i < 2; i++) { 1492 cur = buf; 1493 num = 0; 1494 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1495 if (li->type != types[i]) 1496 continue; 1497 cur->Pid = cpu_to_le16(li->pid); 1498 cur->LengthLow = cpu_to_le32((u32)li->length); 1499 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1500 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1501 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1502 if (++num == max_num) { 1503 stored_rc = cifs_lockv(xid, tcon, 1504 cfile->fid.netfid, 1505 (__u8)li->type, 0, num, 1506 buf); 1507 if (stored_rc) 1508 rc = stored_rc; 1509 cur = buf; 1510 num = 0; 1511 } else 1512 cur++; 1513 } 1514 1515 if (num) { 1516 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1517 (__u8)types[i], 0, num, buf); 1518 if (stored_rc) 1519 rc = stored_rc; 1520 } 1521 } 1522 1523 kfree(buf); 1524 free_xid(xid); 1525 return rc; 1526 } 1527 1528 static __u32 1529 hash_lockowner(fl_owner_t owner) 1530 { 1531 return cifs_lock_secret ^ hash32_ptr((const void *)owner); 1532 } 1533 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1534 1535 struct lock_to_push { 1536 struct list_head llist; 1537 __u64 offset; 1538 __u64 length; 1539 __u32 pid; 1540 __u16 netfid; 1541 __u8 type; 1542 }; 1543 1544 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1545 static int 1546 cifs_push_posix_locks(struct cifsFileInfo *cfile) 1547 { 1548 struct inode *inode = d_inode(cfile->dentry); 1549 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1550 struct file_lock *flock; 1551 struct file_lock_context *flctx = locks_inode_context(inode); 1552 unsigned int count = 0, i; 1553 int rc = 0, xid, type; 1554 struct list_head locks_to_send, *el; 1555 struct lock_to_push *lck, *tmp; 1556 __u64 length; 1557 1558 xid = get_xid(); 1559 1560 if (!flctx) 1561 goto out; 1562 1563 spin_lock(&flctx->flc_lock); 1564 list_for_each(el, &flctx->flc_posix) { 1565 count++; 1566 } 1567 spin_unlock(&flctx->flc_lock); 1568 1569 INIT_LIST_HEAD(&locks_to_send); 1570 1571 /* 1572 * Allocating count locks is enough because no FL_POSIX locks can be 1573 * added to the list while we are holding cinode->lock_sem that 1574 * protects locking operations of this inode. 1575 */ 1576 for (i = 0; i < count; i++) { 1577 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); 1578 if (!lck) { 1579 rc = -ENOMEM; 1580 goto err_out; 1581 } 1582 list_add_tail(&lck->llist, &locks_to_send); 1583 } 1584 1585 el = locks_to_send.next; 1586 spin_lock(&flctx->flc_lock); 1587 for_each_file_lock(flock, &flctx->flc_posix) { 1588 unsigned char ftype = flock->c.flc_type; 1589 1590 if (el == &locks_to_send) { 1591 /* 1592 * The list ended. We don't have enough allocated 1593 * structures - something is really wrong. 1594 */ 1595 cifs_dbg(VFS, "Can't push all brlocks!\n"); 1596 break; 1597 } 1598 length = cifs_flock_len(flock); 1599 if (ftype == F_RDLCK || ftype == F_SHLCK) 1600 type = CIFS_RDLCK; 1601 else 1602 type = CIFS_WRLCK; 1603 lck = list_entry(el, struct lock_to_push, llist); 1604 lck->pid = hash_lockowner(flock->c.flc_owner); 1605 lck->netfid = cfile->fid.netfid; 1606 lck->length = length; 1607 lck->type = type; 1608 lck->offset = flock->fl_start; 1609 } 1610 spin_unlock(&flctx->flc_lock); 1611 1612 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1613 int stored_rc; 1614 1615 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid, 1616 lck->offset, lck->length, NULL, 1617 lck->type, 0); 1618 if (stored_rc) 1619 rc = stored_rc; 1620 list_del(&lck->llist); 1621 kfree(lck); 1622 } 1623 1624 out: 1625 free_xid(xid); 1626 return rc; 1627 err_out: 1628 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1629 list_del(&lck->llist); 1630 kfree(lck); 1631 } 1632 goto out; 1633 } 1634 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1635 1636 static int 1637 cifs_push_locks(struct cifsFileInfo *cfile) 1638 { 1639 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1640 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1641 int rc = 0; 1642 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1643 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 1644 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1645 1646 /* we are going to update can_cache_brlcks here - need a write access */ 1647 cifs_down_write(&cinode->lock_sem); 1648 if (!cinode->can_cache_brlcks) { 1649 up_write(&cinode->lock_sem); 1650 return rc; 1651 } 1652 1653 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1654 if (cap_unix(tcon->ses) && 1655 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 1656 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 1657 rc = cifs_push_posix_locks(cfile); 1658 else 1659 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1660 rc = tcon->ses->server->ops->push_mand_locks(cfile); 1661 1662 cinode->can_cache_brlcks = false; 1663 up_write(&cinode->lock_sem); 1664 return rc; 1665 } 1666 1667 static void 1668 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, 1669 bool *wait_flag, struct TCP_Server_Info *server) 1670 { 1671 if (flock->c.flc_flags & FL_POSIX) 1672 cifs_dbg(FYI, "Posix\n"); 1673 if (flock->c.flc_flags & FL_FLOCK) 1674 cifs_dbg(FYI, "Flock\n"); 1675 if (flock->c.flc_flags & FL_SLEEP) { 1676 cifs_dbg(FYI, "Blocking lock\n"); 1677 *wait_flag = true; 1678 } 1679 if (flock->c.flc_flags & FL_ACCESS) 1680 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n"); 1681 if (flock->c.flc_flags & FL_LEASE) 1682 cifs_dbg(FYI, "Lease on file - not implemented yet\n"); 1683 if (flock->c.flc_flags & 1684 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | 1685 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK))) 1686 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", 1687 flock->c.flc_flags); 1688 1689 *type = server->vals->large_lock_type; 1690 if (lock_is_write(flock)) { 1691 cifs_dbg(FYI, "F_WRLCK\n"); 1692 *type |= server->vals->exclusive_lock_type; 1693 *lock = 1; 1694 } else if (lock_is_unlock(flock)) { 1695 cifs_dbg(FYI, "F_UNLCK\n"); 1696 *type |= server->vals->unlock_lock_type; 1697 *unlock = 1; 1698 /* Check if unlock includes more than one lock range */ 1699 } else if (lock_is_read(flock)) { 1700 cifs_dbg(FYI, "F_RDLCK\n"); 1701 *type |= server->vals->shared_lock_type; 1702 *lock = 1; 1703 } else if (flock->c.flc_type == F_EXLCK) { 1704 cifs_dbg(FYI, "F_EXLCK\n"); 1705 *type |= server->vals->exclusive_lock_type; 1706 *lock = 1; 1707 } else if (flock->c.flc_type == F_SHLCK) { 1708 cifs_dbg(FYI, "F_SHLCK\n"); 1709 *type |= server->vals->shared_lock_type; 1710 *lock = 1; 1711 } else 1712 cifs_dbg(FYI, "Unknown type of lock\n"); 1713 } 1714 1715 static int 1716 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, 1717 bool wait_flag, bool posix_lck, unsigned int xid) 1718 { 1719 int rc = 0; 1720 __u64 length = cifs_flock_len(flock); 1721 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1722 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1723 struct TCP_Server_Info *server = tcon->ses->server; 1724 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1725 __u16 netfid = cfile->fid.netfid; 1726 1727 if (posix_lck) { 1728 int posix_lock_type; 1729 1730 rc = cifs_posix_lock_test(file, flock); 1731 if (!rc) 1732 return rc; 1733 1734 if (type & server->vals->shared_lock_type) 1735 posix_lock_type = CIFS_RDLCK; 1736 else 1737 posix_lock_type = CIFS_WRLCK; 1738 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1739 hash_lockowner(flock->c.flc_owner), 1740 flock->fl_start, length, flock, 1741 posix_lock_type, wait_flag); 1742 return rc; 1743 } 1744 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1745 1746 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock); 1747 if (!rc) 1748 return rc; 1749 1750 /* BB we could chain these into one lock request BB */ 1751 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, 1752 1, 0, false); 1753 if (rc == 0) { 1754 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1755 type, 0, 1, false); 1756 flock->c.flc_type = F_UNLCK; 1757 if (rc != 0) 1758 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1759 rc); 1760 return 0; 1761 } 1762 1763 if (type & server->vals->shared_lock_type) { 1764 flock->c.flc_type = F_WRLCK; 1765 return 0; 1766 } 1767 1768 type &= ~server->vals->exclusive_lock_type; 1769 1770 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1771 type | server->vals->shared_lock_type, 1772 1, 0, false); 1773 if (rc == 0) { 1774 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1775 type | server->vals->shared_lock_type, 0, 1, false); 1776 flock->c.flc_type = F_RDLCK; 1777 if (rc != 0) 1778 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1779 rc); 1780 } else 1781 flock->c.flc_type = F_WRLCK; 1782 1783 return 0; 1784 } 1785 1786 void 1787 cifs_move_llist(struct list_head *source, struct list_head *dest) 1788 { 1789 struct list_head *li, *tmp; 1790 list_for_each_safe(li, tmp, source) 1791 list_move(li, dest); 1792 } 1793 1794 void 1795 cifs_free_llist(struct list_head *llist) 1796 { 1797 struct cifsLockInfo *li, *tmp; 1798 list_for_each_entry_safe(li, tmp, llist, llist) { 1799 cifs_del_lock_waiters(li); 1800 list_del(&li->llist); 1801 kfree(li); 1802 } 1803 } 1804 1805 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1806 int 1807 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, 1808 unsigned int xid) 1809 { 1810 int rc = 0, stored_rc; 1811 static const int types[] = { 1812 LOCKING_ANDX_LARGE_FILES, 1813 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1814 }; 1815 unsigned int i; 1816 unsigned int max_num, num, max_buf; 1817 LOCKING_ANDX_RANGE *buf, *cur; 1818 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1819 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1820 struct cifsLockInfo *li, *tmp; 1821 __u64 length = cifs_flock_len(flock); 1822 struct list_head tmp_llist; 1823 1824 INIT_LIST_HEAD(&tmp_llist); 1825 1826 /* 1827 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1828 * and check it before using. 1829 */ 1830 max_buf = tcon->ses->server->maxBuf; 1831 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) 1832 return -EINVAL; 1833 1834 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1835 PAGE_SIZE); 1836 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1837 PAGE_SIZE); 1838 max_num = (max_buf - sizeof(struct smb_hdr)) / 1839 sizeof(LOCKING_ANDX_RANGE); 1840 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1841 if (!buf) 1842 return -ENOMEM; 1843 1844 cifs_down_write(&cinode->lock_sem); 1845 for (i = 0; i < 2; i++) { 1846 cur = buf; 1847 num = 0; 1848 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1849 if (flock->fl_start > li->offset || 1850 (flock->fl_start + length) < 1851 (li->offset + li->length)) 1852 continue; 1853 if (current->tgid != li->pid) 1854 continue; 1855 if (types[i] != li->type) 1856 continue; 1857 if (cinode->can_cache_brlcks) { 1858 /* 1859 * We can cache brlock requests - simply remove 1860 * a lock from the file's list. 1861 */ 1862 list_del(&li->llist); 1863 cifs_del_lock_waiters(li); 1864 kfree(li); 1865 continue; 1866 } 1867 cur->Pid = cpu_to_le16(li->pid); 1868 cur->LengthLow = cpu_to_le32((u32)li->length); 1869 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1870 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1871 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1872 /* 1873 * We need to save a lock here to let us add it again to 1874 * the file's list if the unlock range request fails on 1875 * the server. 1876 */ 1877 list_move(&li->llist, &tmp_llist); 1878 if (++num == max_num) { 1879 stored_rc = cifs_lockv(xid, tcon, 1880 cfile->fid.netfid, 1881 li->type, num, 0, buf); 1882 if (stored_rc) { 1883 /* 1884 * We failed on the unlock range 1885 * request - add all locks from the tmp 1886 * list to the head of the file's list. 1887 */ 1888 cifs_move_llist(&tmp_llist, 1889 &cfile->llist->locks); 1890 rc = stored_rc; 1891 } else 1892 /* 1893 * The unlock range request succeed - 1894 * free the tmp list. 1895 */ 1896 cifs_free_llist(&tmp_llist); 1897 cur = buf; 1898 num = 0; 1899 } else 1900 cur++; 1901 } 1902 if (num) { 1903 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1904 types[i], num, 0, buf); 1905 if (stored_rc) { 1906 cifs_move_llist(&tmp_llist, 1907 &cfile->llist->locks); 1908 rc = stored_rc; 1909 } else 1910 cifs_free_llist(&tmp_llist); 1911 } 1912 } 1913 1914 up_write(&cinode->lock_sem); 1915 kfree(buf); 1916 return rc; 1917 } 1918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1919 1920 static int 1921 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, 1922 bool wait_flag, bool posix_lck, int lock, int unlock, 1923 unsigned int xid) 1924 { 1925 int rc = 0; 1926 __u64 length = cifs_flock_len(flock); 1927 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1928 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1929 struct TCP_Server_Info *server = tcon->ses->server; 1930 struct inode *inode = d_inode(cfile->dentry); 1931 1932 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1933 if (posix_lck) { 1934 int posix_lock_type; 1935 1936 rc = cifs_posix_lock_set(file, flock); 1937 if (rc <= FILE_LOCK_DEFERRED) 1938 return rc; 1939 1940 if (type & server->vals->shared_lock_type) 1941 posix_lock_type = CIFS_RDLCK; 1942 else 1943 posix_lock_type = CIFS_WRLCK; 1944 1945 if (unlock == 1) 1946 posix_lock_type = CIFS_UNLCK; 1947 1948 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, 1949 hash_lockowner(flock->c.flc_owner), 1950 flock->fl_start, length, 1951 NULL, posix_lock_type, wait_flag); 1952 goto out; 1953 } 1954 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1955 if (lock) { 1956 struct cifsLockInfo *lock; 1957 1958 lock = cifs_lock_init(flock->fl_start, length, type, 1959 flock->c.flc_flags); 1960 if (!lock) 1961 return -ENOMEM; 1962 1963 rc = cifs_lock_add_if(cfile, lock, wait_flag); 1964 if (rc < 0) { 1965 kfree(lock); 1966 return rc; 1967 } 1968 if (!rc) 1969 goto out; 1970 1971 /* 1972 * Windows 7 server can delay breaking lease from read to None 1973 * if we set a byte-range lock on a file - break it explicitly 1974 * before sending the lock to the server to be sure the next 1975 * read won't conflict with non-overlapted locks due to 1976 * pagereading. 1977 */ 1978 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) && 1979 CIFS_CACHE_READ(CIFS_I(inode))) { 1980 cifs_zap_mapping(inode); 1981 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", 1982 inode); 1983 CIFS_I(inode)->oplock = 0; 1984 } 1985 1986 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1987 type, 1, 0, wait_flag); 1988 if (rc) { 1989 kfree(lock); 1990 return rc; 1991 } 1992 1993 cifs_lock_add(cfile, lock); 1994 } else if (unlock) 1995 rc = server->ops->mand_unlock_range(cfile, flock, xid); 1996 1997 out: 1998 if ((flock->c.flc_flags & FL_POSIX) || (flock->c.flc_flags & FL_FLOCK)) { 1999 /* 2000 * If this is a request to remove all locks because we 2001 * are closing the file, it doesn't matter if the 2002 * unlocking failed as both cifs.ko and the SMB server 2003 * remove the lock on file close 2004 */ 2005 if (rc) { 2006 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc); 2007 if (!(flock->c.flc_flags & FL_CLOSE)) 2008 return rc; 2009 } 2010 rc = locks_lock_file_wait(file, flock); 2011 } 2012 return rc; 2013 } 2014 2015 int cifs_flock(struct file *file, int cmd, struct file_lock *fl) 2016 { 2017 int rc, xid; 2018 int lock = 0, unlock = 0; 2019 bool wait_flag = false; 2020 bool posix_lck = false; 2021 struct cifs_sb_info *cifs_sb; 2022 struct cifs_tcon *tcon; 2023 struct cifsFileInfo *cfile; 2024 __u32 type; 2025 2026 xid = get_xid(); 2027 2028 if (!(fl->c.flc_flags & FL_FLOCK)) { 2029 rc = -ENOLCK; 2030 free_xid(xid); 2031 return rc; 2032 } 2033 2034 cfile = (struct cifsFileInfo *)file->private_data; 2035 tcon = tlink_tcon(cfile->tlink); 2036 2037 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag, 2038 tcon->ses->server); 2039 cifs_sb = CIFS_FILE_SB(file); 2040 2041 if (cap_unix(tcon->ses) && 2042 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2043 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2044 posix_lck = true; 2045 2046 if (!lock && !unlock) { 2047 /* 2048 * if no lock or unlock then nothing to do since we do not 2049 * know what it is 2050 */ 2051 rc = -EOPNOTSUPP; 2052 free_xid(xid); 2053 return rc; 2054 } 2055 2056 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock, 2057 xid); 2058 free_xid(xid); 2059 return rc; 2060 2061 2062 } 2063 2064 int cifs_lock(struct file *file, int cmd, struct file_lock *flock) 2065 { 2066 int rc, xid; 2067 int lock = 0, unlock = 0; 2068 bool wait_flag = false; 2069 bool posix_lck = false; 2070 struct cifs_sb_info *cifs_sb; 2071 struct cifs_tcon *tcon; 2072 struct cifsFileInfo *cfile; 2073 __u32 type; 2074 2075 rc = -EACCES; 2076 xid = get_xid(); 2077 2078 cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd, 2079 flock->c.flc_flags, flock->c.flc_type, 2080 (long long)flock->fl_start, 2081 (long long)flock->fl_end); 2082 2083 cfile = (struct cifsFileInfo *)file->private_data; 2084 tcon = tlink_tcon(cfile->tlink); 2085 2086 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, 2087 tcon->ses->server); 2088 cifs_sb = CIFS_FILE_SB(file); 2089 set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags); 2090 2091 if (cap_unix(tcon->ses) && 2092 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2093 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2094 posix_lck = true; 2095 /* 2096 * BB add code here to normalize offset and length to account for 2097 * negative length which we can not accept over the wire. 2098 */ 2099 if (IS_GETLK(cmd)) { 2100 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid); 2101 free_xid(xid); 2102 return rc; 2103 } 2104 2105 if (!lock && !unlock) { 2106 /* 2107 * if no lock or unlock then nothing to do since we do not 2108 * know what it is 2109 */ 2110 free_xid(xid); 2111 return -EOPNOTSUPP; 2112 } 2113 2114 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock, 2115 xid); 2116 free_xid(xid); 2117 return rc; 2118 } 2119 2120 /* 2121 * update the file size (if needed) after a write. Should be called with 2122 * the inode->i_lock held 2123 */ 2124 void 2125 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 2126 unsigned int bytes_written) 2127 { 2128 loff_t end_of_write = offset + bytes_written; 2129 2130 if (end_of_write > cifsi->netfs.remote_i_size) 2131 netfs_resize_file(&cifsi->netfs, end_of_write, true); 2132 } 2133 2134 static ssize_t 2135 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, 2136 size_t write_size, loff_t *offset) 2137 { 2138 int rc = 0; 2139 unsigned int bytes_written = 0; 2140 unsigned int total_written; 2141 struct cifs_tcon *tcon; 2142 struct TCP_Server_Info *server; 2143 unsigned int xid; 2144 struct dentry *dentry = open_file->dentry; 2145 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry)); 2146 struct cifs_io_parms io_parms = {0}; 2147 2148 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", 2149 write_size, *offset, dentry); 2150 2151 tcon = tlink_tcon(open_file->tlink); 2152 server = tcon->ses->server; 2153 2154 if (!server->ops->sync_write) 2155 return -ENOSYS; 2156 2157 xid = get_xid(); 2158 2159 for (total_written = 0; write_size > total_written; 2160 total_written += bytes_written) { 2161 rc = -EAGAIN; 2162 while (rc == -EAGAIN) { 2163 struct kvec iov[2]; 2164 unsigned int len; 2165 2166 if (open_file->invalidHandle) { 2167 /* we could deadlock if we called 2168 filemap_fdatawait from here so tell 2169 reopen_file not to flush data to 2170 server now */ 2171 rc = cifs_reopen_file(open_file, false); 2172 if (rc != 0) 2173 break; 2174 } 2175 2176 len = min(server->ops->wp_retry_size(d_inode(dentry)), 2177 (unsigned int)write_size - total_written); 2178 /* iov[0] is reserved for smb header */ 2179 iov[1].iov_base = (char *)write_data + total_written; 2180 iov[1].iov_len = len; 2181 io_parms.pid = pid; 2182 io_parms.tcon = tcon; 2183 io_parms.offset = *offset; 2184 io_parms.length = len; 2185 rc = server->ops->sync_write(xid, &open_file->fid, 2186 &io_parms, &bytes_written, iov, 1); 2187 } 2188 if (rc || (bytes_written == 0)) { 2189 if (total_written) 2190 break; 2191 else { 2192 free_xid(xid); 2193 return rc; 2194 } 2195 } else { 2196 spin_lock(&d_inode(dentry)->i_lock); 2197 cifs_update_eof(cifsi, *offset, bytes_written); 2198 spin_unlock(&d_inode(dentry)->i_lock); 2199 *offset += bytes_written; 2200 } 2201 } 2202 2203 cifs_stats_bytes_written(tcon, total_written); 2204 2205 if (total_written > 0) { 2206 spin_lock(&d_inode(dentry)->i_lock); 2207 if (*offset > d_inode(dentry)->i_size) { 2208 i_size_write(d_inode(dentry), *offset); 2209 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9; 2210 } 2211 spin_unlock(&d_inode(dentry)->i_lock); 2212 } 2213 mark_inode_dirty_sync(d_inode(dentry)); 2214 free_xid(xid); 2215 return total_written; 2216 } 2217 2218 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, 2219 bool fsuid_only) 2220 { 2221 struct cifsFileInfo *open_file = NULL; 2222 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2223 2224 /* only filter by fsuid on multiuser mounts */ 2225 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2226 fsuid_only = false; 2227 2228 spin_lock(&cifs_inode->open_file_lock); 2229 /* we could simply get the first_list_entry since write-only entries 2230 are always at the end of the list but since the first entry might 2231 have a close pending, we go through the whole list */ 2232 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2233 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2234 continue; 2235 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { 2236 if ((!open_file->invalidHandle)) { 2237 /* found a good file */ 2238 /* lock it so it will not be closed on us */ 2239 cifsFileInfo_get(open_file); 2240 spin_unlock(&cifs_inode->open_file_lock); 2241 return open_file; 2242 } /* else might as well continue, and look for 2243 another, or simply have the caller reopen it 2244 again rather than trying to fix this handle */ 2245 } else /* write only file */ 2246 break; /* write only files are last so must be done */ 2247 } 2248 spin_unlock(&cifs_inode->open_file_lock); 2249 return NULL; 2250 } 2251 2252 /* Return -EBADF if no handle is found and general rc otherwise */ 2253 int 2254 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, 2255 struct cifsFileInfo **ret_file) 2256 { 2257 struct cifsFileInfo *open_file, *inv_file = NULL; 2258 struct cifs_sb_info *cifs_sb; 2259 bool any_available = false; 2260 int rc = -EBADF; 2261 unsigned int refind = 0; 2262 bool fsuid_only = flags & FIND_WR_FSUID_ONLY; 2263 bool with_delete = flags & FIND_WR_WITH_DELETE; 2264 *ret_file = NULL; 2265 2266 /* 2267 * Having a null inode here (because mapping->host was set to zero by 2268 * the VFS or MM) should not happen but we had reports of on oops (due 2269 * to it being zero) during stress testcases so we need to check for it 2270 */ 2271 2272 if (cifs_inode == NULL) { 2273 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n"); 2274 dump_stack(); 2275 return rc; 2276 } 2277 2278 cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2279 2280 /* only filter by fsuid on multiuser mounts */ 2281 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2282 fsuid_only = false; 2283 2284 spin_lock(&cifs_inode->open_file_lock); 2285 refind_writable: 2286 if (refind > MAX_REOPEN_ATT) { 2287 spin_unlock(&cifs_inode->open_file_lock); 2288 return rc; 2289 } 2290 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2291 if (!any_available && open_file->pid != current->tgid) 2292 continue; 2293 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2294 continue; 2295 if (with_delete && !(open_file->fid.access & DELETE)) 2296 continue; 2297 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 2298 if (!open_file->invalidHandle) { 2299 /* found a good writable file */ 2300 cifsFileInfo_get(open_file); 2301 spin_unlock(&cifs_inode->open_file_lock); 2302 *ret_file = open_file; 2303 return 0; 2304 } else { 2305 if (!inv_file) 2306 inv_file = open_file; 2307 } 2308 } 2309 } 2310 /* couldn't find useable FH with same pid, try any available */ 2311 if (!any_available) { 2312 any_available = true; 2313 goto refind_writable; 2314 } 2315 2316 if (inv_file) { 2317 any_available = false; 2318 cifsFileInfo_get(inv_file); 2319 } 2320 2321 spin_unlock(&cifs_inode->open_file_lock); 2322 2323 if (inv_file) { 2324 rc = cifs_reopen_file(inv_file, false); 2325 if (!rc) { 2326 *ret_file = inv_file; 2327 return 0; 2328 } 2329 2330 spin_lock(&cifs_inode->open_file_lock); 2331 list_move_tail(&inv_file->flist, &cifs_inode->openFileList); 2332 spin_unlock(&cifs_inode->open_file_lock); 2333 cifsFileInfo_put(inv_file); 2334 ++refind; 2335 inv_file = NULL; 2336 spin_lock(&cifs_inode->open_file_lock); 2337 goto refind_writable; 2338 } 2339 2340 return rc; 2341 } 2342 2343 struct cifsFileInfo * 2344 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags) 2345 { 2346 struct cifsFileInfo *cfile; 2347 int rc; 2348 2349 rc = cifs_get_writable_file(cifs_inode, flags, &cfile); 2350 if (rc) 2351 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc); 2352 2353 return cfile; 2354 } 2355 2356 int 2357 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, 2358 int flags, 2359 struct cifsFileInfo **ret_file) 2360 { 2361 struct cifsFileInfo *cfile; 2362 void *page = alloc_dentry_path(); 2363 2364 *ret_file = NULL; 2365 2366 spin_lock(&tcon->open_file_lock); 2367 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2368 struct cifsInodeInfo *cinode; 2369 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2370 if (IS_ERR(full_path)) { 2371 spin_unlock(&tcon->open_file_lock); 2372 free_dentry_path(page); 2373 return PTR_ERR(full_path); 2374 } 2375 if (strcmp(full_path, name)) 2376 continue; 2377 2378 cinode = CIFS_I(d_inode(cfile->dentry)); 2379 spin_unlock(&tcon->open_file_lock); 2380 free_dentry_path(page); 2381 return cifs_get_writable_file(cinode, flags, ret_file); 2382 } 2383 2384 spin_unlock(&tcon->open_file_lock); 2385 free_dentry_path(page); 2386 return -ENOENT; 2387 } 2388 2389 int 2390 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, 2391 struct cifsFileInfo **ret_file) 2392 { 2393 struct cifsFileInfo *cfile; 2394 void *page = alloc_dentry_path(); 2395 2396 *ret_file = NULL; 2397 2398 spin_lock(&tcon->open_file_lock); 2399 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2400 struct cifsInodeInfo *cinode; 2401 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2402 if (IS_ERR(full_path)) { 2403 spin_unlock(&tcon->open_file_lock); 2404 free_dentry_path(page); 2405 return PTR_ERR(full_path); 2406 } 2407 if (strcmp(full_path, name)) 2408 continue; 2409 2410 cinode = CIFS_I(d_inode(cfile->dentry)); 2411 spin_unlock(&tcon->open_file_lock); 2412 free_dentry_path(page); 2413 *ret_file = find_readable_file(cinode, 0); 2414 return *ret_file ? 0 : -ENOENT; 2415 } 2416 2417 spin_unlock(&tcon->open_file_lock); 2418 free_dentry_path(page); 2419 return -ENOENT; 2420 } 2421 2422 void 2423 cifs_writedata_release(struct kref *refcount) 2424 { 2425 struct cifs_writedata *wdata = container_of(refcount, 2426 struct cifs_writedata, refcount); 2427 #ifdef CONFIG_CIFS_SMB_DIRECT 2428 if (wdata->mr) { 2429 smbd_deregister_mr(wdata->mr); 2430 wdata->mr = NULL; 2431 } 2432 #endif 2433 2434 if (wdata->cfile) 2435 cifsFileInfo_put(wdata->cfile); 2436 2437 kfree(wdata); 2438 } 2439 2440 /* 2441 * Write failed with a retryable error. Resend the write request. It's also 2442 * possible that the page was redirtied so re-clean the page. 2443 */ 2444 static void 2445 cifs_writev_requeue(struct cifs_writedata *wdata) 2446 { 2447 int rc = 0; 2448 struct inode *inode = d_inode(wdata->cfile->dentry); 2449 struct TCP_Server_Info *server; 2450 unsigned int rest_len = wdata->bytes; 2451 loff_t fpos = wdata->offset; 2452 2453 server = tlink_tcon(wdata->cfile->tlink)->ses->server; 2454 do { 2455 struct cifs_writedata *wdata2; 2456 unsigned int wsize, cur_len; 2457 2458 wsize = server->ops->wp_retry_size(inode); 2459 if (wsize < rest_len) { 2460 if (wsize < PAGE_SIZE) { 2461 rc = -EOPNOTSUPP; 2462 break; 2463 } 2464 cur_len = min(round_down(wsize, PAGE_SIZE), rest_len); 2465 } else { 2466 cur_len = rest_len; 2467 } 2468 2469 wdata2 = cifs_writedata_alloc(cifs_writev_complete); 2470 if (!wdata2) { 2471 rc = -ENOMEM; 2472 break; 2473 } 2474 2475 wdata2->sync_mode = wdata->sync_mode; 2476 wdata2->offset = fpos; 2477 wdata2->bytes = cur_len; 2478 wdata2->iter = wdata->iter; 2479 2480 iov_iter_advance(&wdata2->iter, fpos - wdata->offset); 2481 iov_iter_truncate(&wdata2->iter, wdata2->bytes); 2482 2483 if (iov_iter_is_xarray(&wdata2->iter)) 2484 /* Check for pages having been redirtied and clean 2485 * them. We can do this by walking the xarray. If 2486 * it's not an xarray, then it's a DIO and we shouldn't 2487 * be mucking around with the page bits. 2488 */ 2489 cifs_undirty_folios(inode, fpos, cur_len); 2490 2491 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, 2492 &wdata2->cfile); 2493 if (!wdata2->cfile) { 2494 cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", 2495 rc); 2496 if (!is_retryable_error(rc)) 2497 rc = -EBADF; 2498 } else { 2499 wdata2->pid = wdata2->cfile->pid; 2500 rc = server->ops->async_writev(wdata2, 2501 cifs_writedata_release); 2502 } 2503 2504 kref_put(&wdata2->refcount, cifs_writedata_release); 2505 if (rc) { 2506 if (is_retryable_error(rc)) 2507 continue; 2508 fpos += cur_len; 2509 rest_len -= cur_len; 2510 break; 2511 } 2512 2513 fpos += cur_len; 2514 rest_len -= cur_len; 2515 } while (rest_len > 0); 2516 2517 /* Clean up remaining pages from the original wdata */ 2518 if (iov_iter_is_xarray(&wdata->iter)) 2519 cifs_pages_write_failed(inode, fpos, rest_len); 2520 2521 if (rc != 0 && !is_retryable_error(rc)) 2522 mapping_set_error(inode->i_mapping, rc); 2523 kref_put(&wdata->refcount, cifs_writedata_release); 2524 } 2525 2526 void 2527 cifs_writev_complete(struct work_struct *work) 2528 { 2529 struct cifs_writedata *wdata = container_of(work, 2530 struct cifs_writedata, work); 2531 struct inode *inode = d_inode(wdata->cfile->dentry); 2532 2533 if (wdata->result == 0) { 2534 spin_lock(&inode->i_lock); 2535 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); 2536 spin_unlock(&inode->i_lock); 2537 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink), 2538 wdata->bytes); 2539 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) 2540 return cifs_writev_requeue(wdata); 2541 2542 if (wdata->result == -EAGAIN) 2543 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes); 2544 else if (wdata->result < 0) 2545 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes); 2546 else 2547 cifs_pages_written_back(inode, wdata->offset, wdata->bytes); 2548 2549 if (wdata->result != -EAGAIN) 2550 mapping_set_error(inode->i_mapping, wdata->result); 2551 kref_put(&wdata->refcount, cifs_writedata_release); 2552 } 2553 2554 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete) 2555 { 2556 struct cifs_writedata *wdata; 2557 2558 wdata = kzalloc(sizeof(*wdata), GFP_NOFS); 2559 if (wdata != NULL) { 2560 kref_init(&wdata->refcount); 2561 INIT_LIST_HEAD(&wdata->list); 2562 init_completion(&wdata->done); 2563 INIT_WORK(&wdata->work, complete); 2564 } 2565 return wdata; 2566 } 2567 2568 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) 2569 { 2570 struct address_space *mapping = page->mapping; 2571 loff_t offset = (loff_t)page->index << PAGE_SHIFT; 2572 char *write_data; 2573 int rc = -EFAULT; 2574 int bytes_written = 0; 2575 struct inode *inode; 2576 struct cifsFileInfo *open_file; 2577 2578 if (!mapping || !mapping->host) 2579 return -EFAULT; 2580 2581 inode = page->mapping->host; 2582 2583 offset += (loff_t)from; 2584 write_data = kmap(page); 2585 write_data += from; 2586 2587 if ((to > PAGE_SIZE) || (from > to)) { 2588 kunmap(page); 2589 return -EIO; 2590 } 2591 2592 /* racing with truncate? */ 2593 if (offset > mapping->host->i_size) { 2594 kunmap(page); 2595 return 0; /* don't care */ 2596 } 2597 2598 /* check to make sure that we are not extending the file */ 2599 if (mapping->host->i_size - offset < (loff_t)to) 2600 to = (unsigned)(mapping->host->i_size - offset); 2601 2602 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, 2603 &open_file); 2604 if (!rc) { 2605 bytes_written = cifs_write(open_file, open_file->pid, 2606 write_data, to - from, &offset); 2607 cifsFileInfo_put(open_file); 2608 /* Does mm or vfs already set times? */ 2609 simple_inode_init_ts(inode); 2610 if ((bytes_written > 0) && (offset)) 2611 rc = 0; 2612 else if (bytes_written < 0) 2613 rc = bytes_written; 2614 else 2615 rc = -EFAULT; 2616 } else { 2617 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc); 2618 if (!is_retryable_error(rc)) 2619 rc = -EIO; 2620 } 2621 2622 kunmap(page); 2623 return rc; 2624 } 2625 2626 /* 2627 * Extend the region to be written back to include subsequent contiguously 2628 * dirty pages if possible, but don't sleep while doing so. 2629 */ 2630 static void cifs_extend_writeback(struct address_space *mapping, 2631 long *_count, 2632 loff_t start, 2633 int max_pages, 2634 size_t max_len, 2635 unsigned int *_len) 2636 { 2637 struct folio_batch batch; 2638 struct folio *folio; 2639 unsigned int psize, nr_pages; 2640 size_t len = *_len; 2641 pgoff_t index = (start + len) / PAGE_SIZE; 2642 bool stop = true; 2643 unsigned int i; 2644 XA_STATE(xas, &mapping->i_pages, index); 2645 2646 folio_batch_init(&batch); 2647 2648 do { 2649 /* Firstly, we gather up a batch of contiguous dirty pages 2650 * under the RCU read lock - but we can't clear the dirty flags 2651 * there if any of those pages are mapped. 2652 */ 2653 rcu_read_lock(); 2654 2655 xas_for_each(&xas, folio, ULONG_MAX) { 2656 stop = true; 2657 if (xas_retry(&xas, folio)) 2658 continue; 2659 if (xa_is_value(folio)) 2660 break; 2661 if (folio->index != index) 2662 break; 2663 if (!folio_try_get_rcu(folio)) { 2664 xas_reset(&xas); 2665 continue; 2666 } 2667 nr_pages = folio_nr_pages(folio); 2668 if (nr_pages > max_pages) 2669 break; 2670 2671 /* Has the page moved or been split? */ 2672 if (unlikely(folio != xas_reload(&xas))) { 2673 folio_put(folio); 2674 break; 2675 } 2676 2677 if (!folio_trylock(folio)) { 2678 folio_put(folio); 2679 break; 2680 } 2681 if (!folio_test_dirty(folio) || folio_test_writeback(folio)) { 2682 folio_unlock(folio); 2683 folio_put(folio); 2684 break; 2685 } 2686 2687 max_pages -= nr_pages; 2688 psize = folio_size(folio); 2689 len += psize; 2690 stop = false; 2691 if (max_pages <= 0 || len >= max_len || *_count <= 0) 2692 stop = true; 2693 2694 index += nr_pages; 2695 if (!folio_batch_add(&batch, folio)) 2696 break; 2697 if (stop) 2698 break; 2699 } 2700 2701 if (!stop) 2702 xas_pause(&xas); 2703 rcu_read_unlock(); 2704 2705 /* Now, if we obtained any pages, we can shift them to being 2706 * writable and mark them for caching. 2707 */ 2708 if (!folio_batch_count(&batch)) 2709 break; 2710 2711 for (i = 0; i < folio_batch_count(&batch); i++) { 2712 folio = batch.folios[i]; 2713 /* The folio should be locked, dirty and not undergoing 2714 * writeback from the loop above. 2715 */ 2716 if (!folio_clear_dirty_for_io(folio)) 2717 WARN_ON(1); 2718 folio_start_writeback(folio); 2719 2720 *_count -= folio_nr_pages(folio); 2721 folio_unlock(folio); 2722 } 2723 2724 folio_batch_release(&batch); 2725 cond_resched(); 2726 } while (!stop); 2727 2728 *_len = len; 2729 } 2730 2731 /* 2732 * Write back the locked page and any subsequent non-locked dirty pages. 2733 */ 2734 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, 2735 struct writeback_control *wbc, 2736 struct folio *folio, 2737 loff_t start, loff_t end) 2738 { 2739 struct inode *inode = mapping->host; 2740 struct TCP_Server_Info *server; 2741 struct cifs_writedata *wdata; 2742 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2743 struct cifs_credits credits_on_stack; 2744 struct cifs_credits *credits = &credits_on_stack; 2745 struct cifsFileInfo *cfile = NULL; 2746 unsigned int xid, wsize, len; 2747 loff_t i_size = i_size_read(inode); 2748 size_t max_len; 2749 long count = wbc->nr_to_write; 2750 int rc; 2751 2752 /* The folio should be locked, dirty and not undergoing writeback. */ 2753 folio_start_writeback(folio); 2754 2755 count -= folio_nr_pages(folio); 2756 len = folio_size(folio); 2757 2758 xid = get_xid(); 2759 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); 2760 2761 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); 2762 if (rc) { 2763 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc); 2764 goto err_xid; 2765 } 2766 2767 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 2768 &wsize, credits); 2769 if (rc != 0) 2770 goto err_close; 2771 2772 wdata = cifs_writedata_alloc(cifs_writev_complete); 2773 if (!wdata) { 2774 rc = -ENOMEM; 2775 goto err_uncredit; 2776 } 2777 2778 wdata->sync_mode = wbc->sync_mode; 2779 wdata->offset = folio_pos(folio); 2780 wdata->pid = cfile->pid; 2781 wdata->credits = credits_on_stack; 2782 wdata->cfile = cfile; 2783 wdata->server = server; 2784 cfile = NULL; 2785 2786 /* Find all consecutive lockable dirty pages, stopping when we find a 2787 * page that is not immediately lockable, is not dirty or is missing, 2788 * or we reach the end of the range. 2789 */ 2790 if (start < i_size) { 2791 /* Trim the write to the EOF; the extra data is ignored. Also 2792 * put an upper limit on the size of a single storedata op. 2793 */ 2794 max_len = wsize; 2795 max_len = min_t(unsigned long long, max_len, end - start + 1); 2796 max_len = min_t(unsigned long long, max_len, i_size - start); 2797 2798 if (len < max_len) { 2799 int max_pages = INT_MAX; 2800 2801 #ifdef CONFIG_CIFS_SMB_DIRECT 2802 if (server->smbd_conn) 2803 max_pages = server->smbd_conn->max_frmr_depth; 2804 #endif 2805 max_pages -= folio_nr_pages(folio); 2806 2807 if (max_pages > 0) 2808 cifs_extend_writeback(mapping, &count, start, 2809 max_pages, max_len, &len); 2810 } 2811 len = min_t(loff_t, len, max_len); 2812 } 2813 2814 wdata->bytes = len; 2815 2816 /* We now have a contiguous set of dirty pages, each with writeback 2817 * set; the first page is still locked at this point, but all the rest 2818 * have been unlocked. 2819 */ 2820 folio_unlock(folio); 2821 2822 if (start < i_size) { 2823 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, 2824 start, len); 2825 2826 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); 2827 if (rc) 2828 goto err_wdata; 2829 2830 if (wdata->cfile->invalidHandle) 2831 rc = -EAGAIN; 2832 else 2833 rc = wdata->server->ops->async_writev(wdata, 2834 cifs_writedata_release); 2835 if (rc >= 0) { 2836 kref_put(&wdata->refcount, cifs_writedata_release); 2837 goto err_close; 2838 } 2839 } else { 2840 /* The dirty region was entirely beyond the EOF. */ 2841 cifs_pages_written_back(inode, start, len); 2842 rc = 0; 2843 } 2844 2845 err_wdata: 2846 kref_put(&wdata->refcount, cifs_writedata_release); 2847 err_uncredit: 2848 add_credits_and_wake_if(server, credits, 0); 2849 err_close: 2850 if (cfile) 2851 cifsFileInfo_put(cfile); 2852 err_xid: 2853 free_xid(xid); 2854 if (rc == 0) { 2855 wbc->nr_to_write = count; 2856 rc = len; 2857 } else if (is_retryable_error(rc)) { 2858 cifs_pages_write_redirty(inode, start, len); 2859 } else { 2860 cifs_pages_write_failed(inode, start, len); 2861 mapping_set_error(mapping, rc); 2862 } 2863 /* Indication to update ctime and mtime as close is deferred */ 2864 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 2865 return rc; 2866 } 2867 2868 /* 2869 * write a region of pages back to the server 2870 */ 2871 static int cifs_writepages_region(struct address_space *mapping, 2872 struct writeback_control *wbc, 2873 loff_t start, loff_t end, loff_t *_next) 2874 { 2875 struct folio_batch fbatch; 2876 int skips = 0; 2877 2878 folio_batch_init(&fbatch); 2879 do { 2880 int nr; 2881 pgoff_t index = start / PAGE_SIZE; 2882 2883 nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE, 2884 PAGECACHE_TAG_DIRTY, &fbatch); 2885 if (!nr) 2886 break; 2887 2888 for (int i = 0; i < nr; i++) { 2889 ssize_t ret; 2890 struct folio *folio = fbatch.folios[i]; 2891 2892 redo_folio: 2893 start = folio_pos(folio); /* May regress with THPs */ 2894 2895 /* At this point we hold neither the i_pages lock nor the 2896 * page lock: the page may be truncated or invalidated 2897 * (changing page->mapping to NULL), or even swizzled 2898 * back from swapper_space to tmpfs file mapping 2899 */ 2900 if (wbc->sync_mode != WB_SYNC_NONE) { 2901 ret = folio_lock_killable(folio); 2902 if (ret < 0) 2903 goto write_error; 2904 } else { 2905 if (!folio_trylock(folio)) 2906 goto skip_write; 2907 } 2908 2909 if (folio->mapping != mapping || 2910 !folio_test_dirty(folio)) { 2911 start += folio_size(folio); 2912 folio_unlock(folio); 2913 continue; 2914 } 2915 2916 if (folio_test_writeback(folio) || 2917 folio_test_fscache(folio)) { 2918 folio_unlock(folio); 2919 if (wbc->sync_mode == WB_SYNC_NONE) 2920 goto skip_write; 2921 2922 folio_wait_writeback(folio); 2923 #ifdef CONFIG_CIFS_FSCACHE 2924 folio_wait_fscache(folio); 2925 #endif 2926 goto redo_folio; 2927 } 2928 2929 if (!folio_clear_dirty_for_io(folio)) 2930 /* We hold the page lock - it should've been dirty. */ 2931 WARN_ON(1); 2932 2933 ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end); 2934 if (ret < 0) 2935 goto write_error; 2936 2937 start += ret; 2938 continue; 2939 2940 write_error: 2941 folio_batch_release(&fbatch); 2942 *_next = start; 2943 return ret; 2944 2945 skip_write: 2946 /* 2947 * Too many skipped writes, or need to reschedule? 2948 * Treat it as a write error without an error code. 2949 */ 2950 if (skips >= 5 || need_resched()) { 2951 ret = 0; 2952 goto write_error; 2953 } 2954 2955 /* Otherwise, just skip that folio and go on to the next */ 2956 skips++; 2957 start += folio_size(folio); 2958 continue; 2959 } 2960 2961 folio_batch_release(&fbatch); 2962 cond_resched(); 2963 } while (wbc->nr_to_write > 0); 2964 2965 *_next = start; 2966 return 0; 2967 } 2968 2969 /* 2970 * Write some of the pending data back to the server 2971 */ 2972 static int cifs_writepages(struct address_space *mapping, 2973 struct writeback_control *wbc) 2974 { 2975 loff_t start, next; 2976 int ret; 2977 2978 /* We have to be careful as we can end up racing with setattr() 2979 * truncating the pagecache since the caller doesn't take a lock here 2980 * to prevent it. 2981 */ 2982 2983 if (wbc->range_cyclic) { 2984 start = mapping->writeback_index * PAGE_SIZE; 2985 ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next); 2986 if (ret == 0) { 2987 mapping->writeback_index = next / PAGE_SIZE; 2988 if (start > 0 && wbc->nr_to_write > 0) { 2989 ret = cifs_writepages_region(mapping, wbc, 0, 2990 start, &next); 2991 if (ret == 0) 2992 mapping->writeback_index = 2993 next / PAGE_SIZE; 2994 } 2995 } 2996 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 2997 ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next); 2998 if (wbc->nr_to_write > 0 && ret == 0) 2999 mapping->writeback_index = next / PAGE_SIZE; 3000 } else { 3001 ret = cifs_writepages_region(mapping, wbc, 3002 wbc->range_start, wbc->range_end, &next); 3003 } 3004 3005 return ret; 3006 } 3007 3008 static int 3009 cifs_writepage_locked(struct page *page, struct writeback_control *wbc) 3010 { 3011 int rc; 3012 unsigned int xid; 3013 3014 xid = get_xid(); 3015 /* BB add check for wbc flags */ 3016 get_page(page); 3017 if (!PageUptodate(page)) 3018 cifs_dbg(FYI, "ppw - page not up to date\n"); 3019 3020 /* 3021 * Set the "writeback" flag, and clear "dirty" in the radix tree. 3022 * 3023 * A writepage() implementation always needs to do either this, 3024 * or re-dirty the page with "redirty_page_for_writepage()" in 3025 * the case of a failure. 3026 * 3027 * Just unlocking the page will cause the radix tree tag-bits 3028 * to fail to update with the state of the page correctly. 3029 */ 3030 set_page_writeback(page); 3031 retry_write: 3032 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE); 3033 if (is_retryable_error(rc)) { 3034 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) 3035 goto retry_write; 3036 redirty_page_for_writepage(wbc, page); 3037 } else if (rc != 0) { 3038 SetPageError(page); 3039 mapping_set_error(page->mapping, rc); 3040 } else { 3041 SetPageUptodate(page); 3042 } 3043 end_page_writeback(page); 3044 put_page(page); 3045 free_xid(xid); 3046 return rc; 3047 } 3048 3049 static int cifs_write_end(struct file *file, struct address_space *mapping, 3050 loff_t pos, unsigned len, unsigned copied, 3051 struct page *page, void *fsdata) 3052 { 3053 int rc; 3054 struct inode *inode = mapping->host; 3055 struct cifsFileInfo *cfile = file->private_data; 3056 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 3057 struct folio *folio = page_folio(page); 3058 __u32 pid; 3059 3060 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3061 pid = cfile->pid; 3062 else 3063 pid = current->tgid; 3064 3065 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", 3066 page, pos, copied); 3067 3068 if (folio_test_checked(folio)) { 3069 if (copied == len) 3070 folio_mark_uptodate(folio); 3071 folio_clear_checked(folio); 3072 } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE) 3073 folio_mark_uptodate(folio); 3074 3075 if (!folio_test_uptodate(folio)) { 3076 char *page_data; 3077 unsigned offset = pos & (PAGE_SIZE - 1); 3078 unsigned int xid; 3079 3080 xid = get_xid(); 3081 /* this is probably better than directly calling 3082 partialpage_write since in this function the file handle is 3083 known which we might as well leverage */ 3084 /* BB check if anything else missing out of ppw 3085 such as updating last write time */ 3086 page_data = kmap(page); 3087 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos); 3088 /* if (rc < 0) should we set writebehind rc? */ 3089 kunmap(page); 3090 3091 free_xid(xid); 3092 } else { 3093 rc = copied; 3094 pos += copied; 3095 set_page_dirty(page); 3096 } 3097 3098 if (rc > 0) { 3099 spin_lock(&inode->i_lock); 3100 if (pos > inode->i_size) { 3101 i_size_write(inode, pos); 3102 inode->i_blocks = (512 - 1 + pos) >> 9; 3103 } 3104 spin_unlock(&inode->i_lock); 3105 } 3106 3107 unlock_page(page); 3108 put_page(page); 3109 /* Indication to update ctime and mtime as close is deferred */ 3110 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 3111 3112 return rc; 3113 } 3114 3115 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, 3116 int datasync) 3117 { 3118 unsigned int xid; 3119 int rc = 0; 3120 struct cifs_tcon *tcon; 3121 struct TCP_Server_Info *server; 3122 struct cifsFileInfo *smbfile = file->private_data; 3123 struct inode *inode = file_inode(file); 3124 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3125 3126 rc = file_write_and_wait_range(file, start, end); 3127 if (rc) { 3128 trace_cifs_fsync_err(inode->i_ino, rc); 3129 return rc; 3130 } 3131 3132 xid = get_xid(); 3133 3134 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3135 file, datasync); 3136 3137 if (!CIFS_CACHE_READ(CIFS_I(inode))) { 3138 rc = cifs_zap_mapping(inode); 3139 if (rc) { 3140 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); 3141 rc = 0; /* don't care about it in fsync */ 3142 } 3143 } 3144 3145 tcon = tlink_tcon(smbfile->tlink); 3146 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3147 server = tcon->ses->server; 3148 if (server->ops->flush == NULL) { 3149 rc = -ENOSYS; 3150 goto strict_fsync_exit; 3151 } 3152 3153 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3154 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3155 if (smbfile) { 3156 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3157 cifsFileInfo_put(smbfile); 3158 } else 3159 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3160 } else 3161 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3162 } 3163 3164 strict_fsync_exit: 3165 free_xid(xid); 3166 return rc; 3167 } 3168 3169 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 3170 { 3171 unsigned int xid; 3172 int rc = 0; 3173 struct cifs_tcon *tcon; 3174 struct TCP_Server_Info *server; 3175 struct cifsFileInfo *smbfile = file->private_data; 3176 struct inode *inode = file_inode(file); 3177 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); 3178 3179 rc = file_write_and_wait_range(file, start, end); 3180 if (rc) { 3181 trace_cifs_fsync_err(file_inode(file)->i_ino, rc); 3182 return rc; 3183 } 3184 3185 xid = get_xid(); 3186 3187 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3188 file, datasync); 3189 3190 tcon = tlink_tcon(smbfile->tlink); 3191 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3192 server = tcon->ses->server; 3193 if (server->ops->flush == NULL) { 3194 rc = -ENOSYS; 3195 goto fsync_exit; 3196 } 3197 3198 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3199 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3200 if (smbfile) { 3201 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3202 cifsFileInfo_put(smbfile); 3203 } else 3204 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3205 } else 3206 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3207 } 3208 3209 fsync_exit: 3210 free_xid(xid); 3211 return rc; 3212 } 3213 3214 /* 3215 * As file closes, flush all cached write data for this inode checking 3216 * for write behind errors. 3217 */ 3218 int cifs_flush(struct file *file, fl_owner_t id) 3219 { 3220 struct inode *inode = file_inode(file); 3221 int rc = 0; 3222 3223 if (file->f_mode & FMODE_WRITE) 3224 rc = filemap_write_and_wait(inode->i_mapping); 3225 3226 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); 3227 if (rc) { 3228 /* get more nuanced writeback errors */ 3229 rc = filemap_check_wb_err(file->f_mapping, 0); 3230 trace_cifs_flush_err(inode->i_ino, rc); 3231 } 3232 return rc; 3233 } 3234 3235 static void 3236 cifs_uncached_writedata_release(struct kref *refcount) 3237 { 3238 struct cifs_writedata *wdata = container_of(refcount, 3239 struct cifs_writedata, refcount); 3240 3241 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); 3242 cifs_writedata_release(refcount); 3243 } 3244 3245 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx); 3246 3247 static void 3248 cifs_uncached_writev_complete(struct work_struct *work) 3249 { 3250 struct cifs_writedata *wdata = container_of(work, 3251 struct cifs_writedata, work); 3252 struct inode *inode = d_inode(wdata->cfile->dentry); 3253 struct cifsInodeInfo *cifsi = CIFS_I(inode); 3254 3255 spin_lock(&inode->i_lock); 3256 cifs_update_eof(cifsi, wdata->offset, wdata->bytes); 3257 if (cifsi->netfs.remote_i_size > inode->i_size) 3258 i_size_write(inode, cifsi->netfs.remote_i_size); 3259 spin_unlock(&inode->i_lock); 3260 3261 complete(&wdata->done); 3262 collect_uncached_write_data(wdata->ctx); 3263 /* the below call can possibly free the last ref to aio ctx */ 3264 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3265 } 3266 3267 static int 3268 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, 3269 struct cifs_aio_ctx *ctx) 3270 { 3271 unsigned int wsize; 3272 struct cifs_credits credits; 3273 int rc; 3274 struct TCP_Server_Info *server = wdata->server; 3275 3276 do { 3277 if (wdata->cfile->invalidHandle) { 3278 rc = cifs_reopen_file(wdata->cfile, false); 3279 if (rc == -EAGAIN) 3280 continue; 3281 else if (rc) 3282 break; 3283 } 3284 3285 3286 /* 3287 * Wait for credits to resend this wdata. 3288 * Note: we are attempting to resend the whole wdata not in 3289 * segments 3290 */ 3291 do { 3292 rc = server->ops->wait_mtu_credits(server, wdata->bytes, 3293 &wsize, &credits); 3294 if (rc) 3295 goto fail; 3296 3297 if (wsize < wdata->bytes) { 3298 add_credits_and_wake_if(server, &credits, 0); 3299 msleep(1000); 3300 } 3301 } while (wsize < wdata->bytes); 3302 wdata->credits = credits; 3303 3304 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3305 3306 if (!rc) { 3307 if (wdata->cfile->invalidHandle) 3308 rc = -EAGAIN; 3309 else { 3310 wdata->replay = true; 3311 #ifdef CONFIG_CIFS_SMB_DIRECT 3312 if (wdata->mr) { 3313 wdata->mr->need_invalidate = true; 3314 smbd_deregister_mr(wdata->mr); 3315 wdata->mr = NULL; 3316 } 3317 #endif 3318 rc = server->ops->async_writev(wdata, 3319 cifs_uncached_writedata_release); 3320 } 3321 } 3322 3323 /* If the write was successfully sent, we are done */ 3324 if (!rc) { 3325 list_add_tail(&wdata->list, wdata_list); 3326 return 0; 3327 } 3328 3329 /* Roll back credits and retry if needed */ 3330 add_credits_and_wake_if(server, &wdata->credits, 0); 3331 } while (rc == -EAGAIN); 3332 3333 fail: 3334 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3335 return rc; 3336 } 3337 3338 /* 3339 * Select span of a bvec iterator we're going to use. Limit it by both maximum 3340 * size and maximum number of segments. 3341 */ 3342 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size, 3343 size_t max_segs, unsigned int *_nsegs) 3344 { 3345 const struct bio_vec *bvecs = iter->bvec; 3346 unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; 3347 size_t len, span = 0, n = iter->count; 3348 size_t skip = iter->iov_offset; 3349 3350 if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0) 3351 return 0; 3352 3353 while (n && ix < nbv && skip) { 3354 len = bvecs[ix].bv_len; 3355 if (skip < len) 3356 break; 3357 skip -= len; 3358 n -= len; 3359 ix++; 3360 } 3361 3362 while (n && ix < nbv) { 3363 len = min3(n, bvecs[ix].bv_len - skip, max_size); 3364 span += len; 3365 max_size -= len; 3366 nsegs++; 3367 ix++; 3368 if (max_size == 0 || nsegs >= max_segs) 3369 break; 3370 skip = 0; 3371 n -= len; 3372 } 3373 3374 *_nsegs = nsegs; 3375 return span; 3376 } 3377 3378 static int 3379 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from, 3380 struct cifsFileInfo *open_file, 3381 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, 3382 struct cifs_aio_ctx *ctx) 3383 { 3384 int rc = 0; 3385 size_t cur_len, max_len; 3386 struct cifs_writedata *wdata; 3387 pid_t pid; 3388 struct TCP_Server_Info *server; 3389 unsigned int xid, max_segs = INT_MAX; 3390 3391 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3392 pid = open_file->pid; 3393 else 3394 pid = current->tgid; 3395 3396 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 3397 xid = get_xid(); 3398 3399 #ifdef CONFIG_CIFS_SMB_DIRECT 3400 if (server->smbd_conn) 3401 max_segs = server->smbd_conn->max_frmr_depth; 3402 #endif 3403 3404 do { 3405 struct cifs_credits credits_on_stack; 3406 struct cifs_credits *credits = &credits_on_stack; 3407 unsigned int wsize, nsegs = 0; 3408 3409 if (signal_pending(current)) { 3410 rc = -EINTR; 3411 break; 3412 } 3413 3414 if (open_file->invalidHandle) { 3415 rc = cifs_reopen_file(open_file, false); 3416 if (rc == -EAGAIN) 3417 continue; 3418 else if (rc) 3419 break; 3420 } 3421 3422 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 3423 &wsize, credits); 3424 if (rc) 3425 break; 3426 3427 max_len = min_t(const size_t, len, wsize); 3428 if (!max_len) { 3429 rc = -EAGAIN; 3430 add_credits_and_wake_if(server, credits, 0); 3431 break; 3432 } 3433 3434 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs); 3435 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n", 3436 cur_len, max_len, nsegs, from->nr_segs, max_segs); 3437 if (cur_len == 0) { 3438 rc = -EIO; 3439 add_credits_and_wake_if(server, credits, 0); 3440 break; 3441 } 3442 3443 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete); 3444 if (!wdata) { 3445 rc = -ENOMEM; 3446 add_credits_and_wake_if(server, credits, 0); 3447 break; 3448 } 3449 3450 wdata->sync_mode = WB_SYNC_ALL; 3451 wdata->offset = (__u64)fpos; 3452 wdata->cfile = cifsFileInfo_get(open_file); 3453 wdata->server = server; 3454 wdata->pid = pid; 3455 wdata->bytes = cur_len; 3456 wdata->credits = credits_on_stack; 3457 wdata->iter = *from; 3458 wdata->ctx = ctx; 3459 kref_get(&ctx->refcount); 3460 3461 iov_iter_truncate(&wdata->iter, cur_len); 3462 3463 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3464 3465 if (!rc) { 3466 if (wdata->cfile->invalidHandle) 3467 rc = -EAGAIN; 3468 else 3469 rc = server->ops->async_writev(wdata, 3470 cifs_uncached_writedata_release); 3471 } 3472 3473 if (rc) { 3474 add_credits_and_wake_if(server, &wdata->credits, 0); 3475 kref_put(&wdata->refcount, 3476 cifs_uncached_writedata_release); 3477 if (rc == -EAGAIN) 3478 continue; 3479 break; 3480 } 3481 3482 list_add_tail(&wdata->list, wdata_list); 3483 iov_iter_advance(from, cur_len); 3484 fpos += cur_len; 3485 len -= cur_len; 3486 } while (len > 0); 3487 3488 free_xid(xid); 3489 return rc; 3490 } 3491 3492 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) 3493 { 3494 struct cifs_writedata *wdata, *tmp; 3495 struct cifs_tcon *tcon; 3496 struct cifs_sb_info *cifs_sb; 3497 struct dentry *dentry = ctx->cfile->dentry; 3498 ssize_t rc; 3499 3500 tcon = tlink_tcon(ctx->cfile->tlink); 3501 cifs_sb = CIFS_SB(dentry->d_sb); 3502 3503 mutex_lock(&ctx->aio_mutex); 3504 3505 if (list_empty(&ctx->list)) { 3506 mutex_unlock(&ctx->aio_mutex); 3507 return; 3508 } 3509 3510 rc = ctx->rc; 3511 /* 3512 * Wait for and collect replies for any successful sends in order of 3513 * increasing offset. Once an error is hit, then return without waiting 3514 * for any more replies. 3515 */ 3516 restart_loop: 3517 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) { 3518 if (!rc) { 3519 if (!try_wait_for_completion(&wdata->done)) { 3520 mutex_unlock(&ctx->aio_mutex); 3521 return; 3522 } 3523 3524 if (wdata->result) 3525 rc = wdata->result; 3526 else 3527 ctx->total_len += wdata->bytes; 3528 3529 /* resend call if it's a retryable error */ 3530 if (rc == -EAGAIN) { 3531 struct list_head tmp_list; 3532 struct iov_iter tmp_from = ctx->iter; 3533 3534 INIT_LIST_HEAD(&tmp_list); 3535 list_del_init(&wdata->list); 3536 3537 if (ctx->direct_io) 3538 rc = cifs_resend_wdata( 3539 wdata, &tmp_list, ctx); 3540 else { 3541 iov_iter_advance(&tmp_from, 3542 wdata->offset - ctx->pos); 3543 3544 rc = cifs_write_from_iter(wdata->offset, 3545 wdata->bytes, &tmp_from, 3546 ctx->cfile, cifs_sb, &tmp_list, 3547 ctx); 3548 3549 kref_put(&wdata->refcount, 3550 cifs_uncached_writedata_release); 3551 } 3552 3553 list_splice(&tmp_list, &ctx->list); 3554 goto restart_loop; 3555 } 3556 } 3557 list_del_init(&wdata->list); 3558 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3559 } 3560 3561 cifs_stats_bytes_written(tcon, ctx->total_len); 3562 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); 3563 3564 ctx->rc = (rc == 0) ? ctx->total_len : rc; 3565 3566 mutex_unlock(&ctx->aio_mutex); 3567 3568 if (ctx->iocb && ctx->iocb->ki_complete) 3569 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 3570 else 3571 complete(&ctx->done); 3572 } 3573 3574 static ssize_t __cifs_writev( 3575 struct kiocb *iocb, struct iov_iter *from, bool direct) 3576 { 3577 struct file *file = iocb->ki_filp; 3578 ssize_t total_written = 0; 3579 struct cifsFileInfo *cfile; 3580 struct cifs_tcon *tcon; 3581 struct cifs_sb_info *cifs_sb; 3582 struct cifs_aio_ctx *ctx; 3583 int rc; 3584 3585 rc = generic_write_checks(iocb, from); 3586 if (rc <= 0) 3587 return rc; 3588 3589 cifs_sb = CIFS_FILE_SB(file); 3590 cfile = file->private_data; 3591 tcon = tlink_tcon(cfile->tlink); 3592 3593 if (!tcon->ses->server->ops->async_writev) 3594 return -ENOSYS; 3595 3596 ctx = cifs_aio_ctx_alloc(); 3597 if (!ctx) 3598 return -ENOMEM; 3599 3600 ctx->cfile = cifsFileInfo_get(cfile); 3601 3602 if (!is_sync_kiocb(iocb)) 3603 ctx->iocb = iocb; 3604 3605 ctx->pos = iocb->ki_pos; 3606 ctx->direct_io = direct; 3607 ctx->nr_pinned_pages = 0; 3608 3609 if (user_backed_iter(from)) { 3610 /* 3611 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 3612 * they contain references to the calling process's virtual 3613 * memory layout which won't be available in an async worker 3614 * thread. This also takes a pin on every folio involved. 3615 */ 3616 rc = netfs_extract_user_iter(from, iov_iter_count(from), 3617 &ctx->iter, 0); 3618 if (rc < 0) { 3619 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3620 return rc; 3621 } 3622 3623 ctx->nr_pinned_pages = rc; 3624 ctx->bv = (void *)ctx->iter.bvec; 3625 ctx->bv_need_unpin = iov_iter_extract_will_pin(from); 3626 } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) && 3627 !is_sync_kiocb(iocb)) { 3628 /* 3629 * If the op is asynchronous, we need to copy the list attached 3630 * to a BVEC/KVEC-type iterator, but we assume that the storage 3631 * will be pinned by the caller; in any case, we may or may not 3632 * be able to pin the pages, so we don't try. 3633 */ 3634 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL); 3635 if (!ctx->bv) { 3636 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3637 return -ENOMEM; 3638 } 3639 } else { 3640 /* 3641 * Otherwise, we just pass the iterator down as-is and rely on 3642 * the caller to make sure the pages referred to by the 3643 * iterator don't evaporate. 3644 */ 3645 ctx->iter = *from; 3646 } 3647 3648 ctx->len = iov_iter_count(&ctx->iter); 3649 3650 /* grab a lock here due to read response handlers can access ctx */ 3651 mutex_lock(&ctx->aio_mutex); 3652 3653 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter, 3654 cfile, cifs_sb, &ctx->list, ctx); 3655 3656 /* 3657 * If at least one write was successfully sent, then discard any rc 3658 * value from the later writes. If the other write succeeds, then 3659 * we'll end up returning whatever was written. If it fails, then 3660 * we'll get a new rc value from that. 3661 */ 3662 if (!list_empty(&ctx->list)) 3663 rc = 0; 3664 3665 mutex_unlock(&ctx->aio_mutex); 3666 3667 if (rc) { 3668 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3669 return rc; 3670 } 3671 3672 if (!is_sync_kiocb(iocb)) { 3673 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3674 return -EIOCBQUEUED; 3675 } 3676 3677 rc = wait_for_completion_killable(&ctx->done); 3678 if (rc) { 3679 mutex_lock(&ctx->aio_mutex); 3680 ctx->rc = rc = -EINTR; 3681 total_written = ctx->total_len; 3682 mutex_unlock(&ctx->aio_mutex); 3683 } else { 3684 rc = ctx->rc; 3685 total_written = ctx->total_len; 3686 } 3687 3688 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3689 3690 if (unlikely(!total_written)) 3691 return rc; 3692 3693 iocb->ki_pos += total_written; 3694 return total_written; 3695 } 3696 3697 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) 3698 { 3699 struct file *file = iocb->ki_filp; 3700 3701 cifs_revalidate_mapping(file->f_inode); 3702 return __cifs_writev(iocb, from, true); 3703 } 3704 3705 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) 3706 { 3707 return __cifs_writev(iocb, from, false); 3708 } 3709 3710 static ssize_t 3711 cifs_writev(struct kiocb *iocb, struct iov_iter *from) 3712 { 3713 struct file *file = iocb->ki_filp; 3714 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 3715 struct inode *inode = file->f_mapping->host; 3716 struct cifsInodeInfo *cinode = CIFS_I(inode); 3717 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 3718 ssize_t rc; 3719 3720 inode_lock(inode); 3721 /* 3722 * We need to hold the sem to be sure nobody modifies lock list 3723 * with a brlock that prevents writing. 3724 */ 3725 down_read(&cinode->lock_sem); 3726 3727 rc = generic_write_checks(iocb, from); 3728 if (rc <= 0) 3729 goto out; 3730 3731 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), 3732 server->vals->exclusive_lock_type, 0, 3733 NULL, CIFS_WRITE_OP)) 3734 rc = __generic_file_write_iter(iocb, from); 3735 else 3736 rc = -EACCES; 3737 out: 3738 up_read(&cinode->lock_sem); 3739 inode_unlock(inode); 3740 3741 if (rc > 0) 3742 rc = generic_write_sync(iocb, rc); 3743 return rc; 3744 } 3745 3746 ssize_t 3747 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) 3748 { 3749 struct inode *inode = file_inode(iocb->ki_filp); 3750 struct cifsInodeInfo *cinode = CIFS_I(inode); 3751 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3752 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 3753 iocb->ki_filp->private_data; 3754 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 3755 ssize_t written; 3756 3757 written = cifs_get_writer(cinode); 3758 if (written) 3759 return written; 3760 3761 if (CIFS_CACHE_WRITE(cinode)) { 3762 if (cap_unix(tcon->ses) && 3763 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 3764 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 3765 written = generic_file_write_iter(iocb, from); 3766 goto out; 3767 } 3768 written = cifs_writev(iocb, from); 3769 goto out; 3770 } 3771 /* 3772 * For non-oplocked files in strict cache mode we need to write the data 3773 * to the server exactly from the pos to pos+len-1 rather than flush all 3774 * affected pages because it may cause a error with mandatory locks on 3775 * these pages but not on the region from pos to ppos+len-1. 3776 */ 3777 written = cifs_user_writev(iocb, from); 3778 if (CIFS_CACHE_READ(cinode)) { 3779 /* 3780 * We have read level caching and we have just sent a write 3781 * request to the server thus making data in the cache stale. 3782 * Zap the cache and set oplock/lease level to NONE to avoid 3783 * reading stale data from the cache. All subsequent read 3784 * operations will read new data from the server. 3785 */ 3786 cifs_zap_mapping(inode); 3787 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n", 3788 inode); 3789 cinode->oplock = 0; 3790 } 3791 out: 3792 cifs_put_writer(cinode); 3793 return written; 3794 } 3795 3796 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete) 3797 { 3798 struct cifs_readdata *rdata; 3799 3800 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); 3801 if (rdata) { 3802 kref_init(&rdata->refcount); 3803 INIT_LIST_HEAD(&rdata->list); 3804 init_completion(&rdata->done); 3805 INIT_WORK(&rdata->work, complete); 3806 } 3807 3808 return rdata; 3809 } 3810 3811 void 3812 cifs_readdata_release(struct kref *refcount) 3813 { 3814 struct cifs_readdata *rdata = container_of(refcount, 3815 struct cifs_readdata, refcount); 3816 3817 if (rdata->ctx) 3818 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); 3819 #ifdef CONFIG_CIFS_SMB_DIRECT 3820 if (rdata->mr) { 3821 smbd_deregister_mr(rdata->mr); 3822 rdata->mr = NULL; 3823 } 3824 #endif 3825 if (rdata->cfile) 3826 cifsFileInfo_put(rdata->cfile); 3827 3828 kfree(rdata); 3829 } 3830 3831 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx); 3832 3833 static void 3834 cifs_uncached_readv_complete(struct work_struct *work) 3835 { 3836 struct cifs_readdata *rdata = container_of(work, 3837 struct cifs_readdata, work); 3838 3839 complete(&rdata->done); 3840 collect_uncached_read_data(rdata->ctx); 3841 /* the below call can possibly free the last ref to aio ctx */ 3842 kref_put(&rdata->refcount, cifs_readdata_release); 3843 } 3844 3845 static int cifs_resend_rdata(struct cifs_readdata *rdata, 3846 struct list_head *rdata_list, 3847 struct cifs_aio_ctx *ctx) 3848 { 3849 unsigned int rsize; 3850 struct cifs_credits credits; 3851 int rc; 3852 struct TCP_Server_Info *server; 3853 3854 /* XXX: should we pick a new channel here? */ 3855 server = rdata->server; 3856 3857 do { 3858 if (rdata->cfile->invalidHandle) { 3859 rc = cifs_reopen_file(rdata->cfile, true); 3860 if (rc == -EAGAIN) 3861 continue; 3862 else if (rc) 3863 break; 3864 } 3865 3866 /* 3867 * Wait for credits to resend this rdata. 3868 * Note: we are attempting to resend the whole rdata not in 3869 * segments 3870 */ 3871 do { 3872 rc = server->ops->wait_mtu_credits(server, rdata->bytes, 3873 &rsize, &credits); 3874 3875 if (rc) 3876 goto fail; 3877 3878 if (rsize < rdata->bytes) { 3879 add_credits_and_wake_if(server, &credits, 0); 3880 msleep(1000); 3881 } 3882 } while (rsize < rdata->bytes); 3883 rdata->credits = credits; 3884 3885 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 3886 if (!rc) { 3887 if (rdata->cfile->invalidHandle) 3888 rc = -EAGAIN; 3889 else { 3890 #ifdef CONFIG_CIFS_SMB_DIRECT 3891 if (rdata->mr) { 3892 rdata->mr->need_invalidate = true; 3893 smbd_deregister_mr(rdata->mr); 3894 rdata->mr = NULL; 3895 } 3896 #endif 3897 rc = server->ops->async_readv(rdata); 3898 } 3899 } 3900 3901 /* If the read was successfully sent, we are done */ 3902 if (!rc) { 3903 /* Add to aio pending list */ 3904 list_add_tail(&rdata->list, rdata_list); 3905 return 0; 3906 } 3907 3908 /* Roll back credits and retry if needed */ 3909 add_credits_and_wake_if(server, &rdata->credits, 0); 3910 } while (rc == -EAGAIN); 3911 3912 fail: 3913 kref_put(&rdata->refcount, cifs_readdata_release); 3914 return rc; 3915 } 3916 3917 static int 3918 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file, 3919 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, 3920 struct cifs_aio_ctx *ctx) 3921 { 3922 struct cifs_readdata *rdata; 3923 unsigned int rsize, nsegs, max_segs = INT_MAX; 3924 struct cifs_credits credits_on_stack; 3925 struct cifs_credits *credits = &credits_on_stack; 3926 size_t cur_len, max_len; 3927 int rc; 3928 pid_t pid; 3929 struct TCP_Server_Info *server; 3930 3931 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 3932 3933 #ifdef CONFIG_CIFS_SMB_DIRECT 3934 if (server->smbd_conn) 3935 max_segs = server->smbd_conn->max_frmr_depth; 3936 #endif 3937 3938 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3939 pid = open_file->pid; 3940 else 3941 pid = current->tgid; 3942 3943 do { 3944 if (open_file->invalidHandle) { 3945 rc = cifs_reopen_file(open_file, true); 3946 if (rc == -EAGAIN) 3947 continue; 3948 else if (rc) 3949 break; 3950 } 3951 3952 if (cifs_sb->ctx->rsize == 0) 3953 cifs_sb->ctx->rsize = 3954 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 3955 cifs_sb->ctx); 3956 3957 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 3958 &rsize, credits); 3959 if (rc) 3960 break; 3961 3962 max_len = min_t(size_t, len, rsize); 3963 3964 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len, 3965 max_segs, &nsegs); 3966 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n", 3967 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs); 3968 if (cur_len == 0) { 3969 rc = -EIO; 3970 add_credits_and_wake_if(server, credits, 0); 3971 break; 3972 } 3973 3974 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete); 3975 if (!rdata) { 3976 add_credits_and_wake_if(server, credits, 0); 3977 rc = -ENOMEM; 3978 break; 3979 } 3980 3981 rdata->server = server; 3982 rdata->cfile = cifsFileInfo_get(open_file); 3983 rdata->offset = fpos; 3984 rdata->bytes = cur_len; 3985 rdata->pid = pid; 3986 rdata->credits = credits_on_stack; 3987 rdata->ctx = ctx; 3988 kref_get(&ctx->refcount); 3989 3990 rdata->iter = ctx->iter; 3991 iov_iter_truncate(&rdata->iter, cur_len); 3992 3993 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 3994 3995 if (!rc) { 3996 if (rdata->cfile->invalidHandle) 3997 rc = -EAGAIN; 3998 else 3999 rc = server->ops->async_readv(rdata); 4000 } 4001 4002 if (rc) { 4003 add_credits_and_wake_if(server, &rdata->credits, 0); 4004 kref_put(&rdata->refcount, cifs_readdata_release); 4005 if (rc == -EAGAIN) 4006 continue; 4007 break; 4008 } 4009 4010 list_add_tail(&rdata->list, rdata_list); 4011 iov_iter_advance(&ctx->iter, cur_len); 4012 fpos += cur_len; 4013 len -= cur_len; 4014 } while (len > 0); 4015 4016 return rc; 4017 } 4018 4019 static void 4020 collect_uncached_read_data(struct cifs_aio_ctx *ctx) 4021 { 4022 struct cifs_readdata *rdata, *tmp; 4023 struct cifs_sb_info *cifs_sb; 4024 int rc; 4025 4026 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb); 4027 4028 mutex_lock(&ctx->aio_mutex); 4029 4030 if (list_empty(&ctx->list)) { 4031 mutex_unlock(&ctx->aio_mutex); 4032 return; 4033 } 4034 4035 rc = ctx->rc; 4036 /* the loop below should proceed in the order of increasing offsets */ 4037 again: 4038 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) { 4039 if (!rc) { 4040 if (!try_wait_for_completion(&rdata->done)) { 4041 mutex_unlock(&ctx->aio_mutex); 4042 return; 4043 } 4044 4045 if (rdata->result == -EAGAIN) { 4046 /* resend call if it's a retryable error */ 4047 struct list_head tmp_list; 4048 unsigned int got_bytes = rdata->got_bytes; 4049 4050 list_del_init(&rdata->list); 4051 INIT_LIST_HEAD(&tmp_list); 4052 4053 if (ctx->direct_io) { 4054 /* 4055 * Re-use rdata as this is a 4056 * direct I/O 4057 */ 4058 rc = cifs_resend_rdata( 4059 rdata, 4060 &tmp_list, ctx); 4061 } else { 4062 rc = cifs_send_async_read( 4063 rdata->offset + got_bytes, 4064 rdata->bytes - got_bytes, 4065 rdata->cfile, cifs_sb, 4066 &tmp_list, ctx); 4067 4068 kref_put(&rdata->refcount, 4069 cifs_readdata_release); 4070 } 4071 4072 list_splice(&tmp_list, &ctx->list); 4073 4074 goto again; 4075 } else if (rdata->result) 4076 rc = rdata->result; 4077 4078 /* if there was a short read -- discard anything left */ 4079 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) 4080 rc = -ENODATA; 4081 4082 ctx->total_len += rdata->got_bytes; 4083 } 4084 list_del_init(&rdata->list); 4085 kref_put(&rdata->refcount, cifs_readdata_release); 4086 } 4087 4088 /* mask nodata case */ 4089 if (rc == -ENODATA) 4090 rc = 0; 4091 4092 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; 4093 4094 mutex_unlock(&ctx->aio_mutex); 4095 4096 if (ctx->iocb && ctx->iocb->ki_complete) 4097 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 4098 else 4099 complete(&ctx->done); 4100 } 4101 4102 static ssize_t __cifs_readv( 4103 struct kiocb *iocb, struct iov_iter *to, bool direct) 4104 { 4105 size_t len; 4106 struct file *file = iocb->ki_filp; 4107 struct cifs_sb_info *cifs_sb; 4108 struct cifsFileInfo *cfile; 4109 struct cifs_tcon *tcon; 4110 ssize_t rc, total_read = 0; 4111 loff_t offset = iocb->ki_pos; 4112 struct cifs_aio_ctx *ctx; 4113 4114 len = iov_iter_count(to); 4115 if (!len) 4116 return 0; 4117 4118 cifs_sb = CIFS_FILE_SB(file); 4119 cfile = file->private_data; 4120 tcon = tlink_tcon(cfile->tlink); 4121 4122 if (!tcon->ses->server->ops->async_readv) 4123 return -ENOSYS; 4124 4125 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4126 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4127 4128 ctx = cifs_aio_ctx_alloc(); 4129 if (!ctx) 4130 return -ENOMEM; 4131 4132 ctx->pos = offset; 4133 ctx->direct_io = direct; 4134 ctx->len = len; 4135 ctx->cfile = cifsFileInfo_get(cfile); 4136 ctx->nr_pinned_pages = 0; 4137 4138 if (!is_sync_kiocb(iocb)) 4139 ctx->iocb = iocb; 4140 4141 if (user_backed_iter(to)) { 4142 /* 4143 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 4144 * they contain references to the calling process's virtual 4145 * memory layout which won't be available in an async worker 4146 * thread. This also takes a pin on every folio involved. 4147 */ 4148 rc = netfs_extract_user_iter(to, iov_iter_count(to), 4149 &ctx->iter, 0); 4150 if (rc < 0) { 4151 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4152 return rc; 4153 } 4154 4155 ctx->nr_pinned_pages = rc; 4156 ctx->bv = (void *)ctx->iter.bvec; 4157 ctx->bv_need_unpin = iov_iter_extract_will_pin(to); 4158 ctx->should_dirty = true; 4159 } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) && 4160 !is_sync_kiocb(iocb)) { 4161 /* 4162 * If the op is asynchronous, we need to copy the list attached 4163 * to a BVEC/KVEC-type iterator, but we assume that the storage 4164 * will be retained by the caller; in any case, we may or may 4165 * not be able to pin the pages, so we don't try. 4166 */ 4167 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL); 4168 if (!ctx->bv) { 4169 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4170 return -ENOMEM; 4171 } 4172 } else { 4173 /* 4174 * Otherwise, we just pass the iterator down as-is and rely on 4175 * the caller to make sure the pages referred to by the 4176 * iterator don't evaporate. 4177 */ 4178 ctx->iter = *to; 4179 } 4180 4181 if (direct) { 4182 rc = filemap_write_and_wait_range(file->f_inode->i_mapping, 4183 offset, offset + len - 1); 4184 if (rc) { 4185 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4186 return -EAGAIN; 4187 } 4188 } 4189 4190 /* grab a lock here due to read response handlers can access ctx */ 4191 mutex_lock(&ctx->aio_mutex); 4192 4193 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx); 4194 4195 /* if at least one read request send succeeded, then reset rc */ 4196 if (!list_empty(&ctx->list)) 4197 rc = 0; 4198 4199 mutex_unlock(&ctx->aio_mutex); 4200 4201 if (rc) { 4202 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4203 return rc; 4204 } 4205 4206 if (!is_sync_kiocb(iocb)) { 4207 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4208 return -EIOCBQUEUED; 4209 } 4210 4211 rc = wait_for_completion_killable(&ctx->done); 4212 if (rc) { 4213 mutex_lock(&ctx->aio_mutex); 4214 ctx->rc = rc = -EINTR; 4215 total_read = ctx->total_len; 4216 mutex_unlock(&ctx->aio_mutex); 4217 } else { 4218 rc = ctx->rc; 4219 total_read = ctx->total_len; 4220 } 4221 4222 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4223 4224 if (total_read) { 4225 iocb->ki_pos += total_read; 4226 return total_read; 4227 } 4228 return rc; 4229 } 4230 4231 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) 4232 { 4233 return __cifs_readv(iocb, to, true); 4234 } 4235 4236 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) 4237 { 4238 return __cifs_readv(iocb, to, false); 4239 } 4240 4241 ssize_t 4242 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) 4243 { 4244 struct inode *inode = file_inode(iocb->ki_filp); 4245 struct cifsInodeInfo *cinode = CIFS_I(inode); 4246 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 4247 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 4248 iocb->ki_filp->private_data; 4249 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 4250 int rc = -EACCES; 4251 4252 /* 4253 * In strict cache mode we need to read from the server all the time 4254 * if we don't have level II oplock because the server can delay mtime 4255 * change - so we can't make a decision about inode invalidating. 4256 * And we can also fail with pagereading if there are mandatory locks 4257 * on pages affected by this read but not on the region from pos to 4258 * pos+len-1. 4259 */ 4260 if (!CIFS_CACHE_READ(cinode)) 4261 return cifs_user_readv(iocb, to); 4262 4263 if (cap_unix(tcon->ses) && 4264 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 4265 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 4266 return generic_file_read_iter(iocb, to); 4267 4268 /* 4269 * We need to hold the sem to be sure nobody modifies lock list 4270 * with a brlock that prevents reading. 4271 */ 4272 down_read(&cinode->lock_sem); 4273 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), 4274 tcon->ses->server->vals->shared_lock_type, 4275 0, NULL, CIFS_READ_OP)) 4276 rc = generic_file_read_iter(iocb, to); 4277 up_read(&cinode->lock_sem); 4278 return rc; 4279 } 4280 4281 static ssize_t 4282 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) 4283 { 4284 int rc = -EACCES; 4285 unsigned int bytes_read = 0; 4286 unsigned int total_read; 4287 unsigned int current_read_size; 4288 unsigned int rsize; 4289 struct cifs_sb_info *cifs_sb; 4290 struct cifs_tcon *tcon; 4291 struct TCP_Server_Info *server; 4292 unsigned int xid; 4293 char *cur_offset; 4294 struct cifsFileInfo *open_file; 4295 struct cifs_io_parms io_parms = {0}; 4296 int buf_type = CIFS_NO_BUFFER; 4297 __u32 pid; 4298 4299 xid = get_xid(); 4300 cifs_sb = CIFS_FILE_SB(file); 4301 4302 /* FIXME: set up handlers for larger reads and/or convert to async */ 4303 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize); 4304 4305 if (file->private_data == NULL) { 4306 rc = -EBADF; 4307 free_xid(xid); 4308 return rc; 4309 } 4310 open_file = file->private_data; 4311 tcon = tlink_tcon(open_file->tlink); 4312 server = cifs_pick_channel(tcon->ses); 4313 4314 if (!server->ops->sync_read) { 4315 free_xid(xid); 4316 return -ENOSYS; 4317 } 4318 4319 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4320 pid = open_file->pid; 4321 else 4322 pid = current->tgid; 4323 4324 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4325 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4326 4327 for (total_read = 0, cur_offset = read_data; read_size > total_read; 4328 total_read += bytes_read, cur_offset += bytes_read) { 4329 do { 4330 current_read_size = min_t(uint, read_size - total_read, 4331 rsize); 4332 /* 4333 * For windows me and 9x we do not want to request more 4334 * than it negotiated since it will refuse the read 4335 * then. 4336 */ 4337 if (!(tcon->ses->capabilities & 4338 tcon->ses->server->vals->cap_large_files)) { 4339 current_read_size = min_t(uint, 4340 current_read_size, CIFSMaxBufSize); 4341 } 4342 if (open_file->invalidHandle) { 4343 rc = cifs_reopen_file(open_file, true); 4344 if (rc != 0) 4345 break; 4346 } 4347 io_parms.pid = pid; 4348 io_parms.tcon = tcon; 4349 io_parms.offset = *offset; 4350 io_parms.length = current_read_size; 4351 io_parms.server = server; 4352 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, 4353 &bytes_read, &cur_offset, 4354 &buf_type); 4355 } while (rc == -EAGAIN); 4356 4357 if (rc || (bytes_read == 0)) { 4358 if (total_read) { 4359 break; 4360 } else { 4361 free_xid(xid); 4362 return rc; 4363 } 4364 } else { 4365 cifs_stats_bytes_read(tcon, total_read); 4366 *offset += bytes_read; 4367 } 4368 } 4369 free_xid(xid); 4370 return total_read; 4371 } 4372 4373 /* 4374 * If the page is mmap'ed into a process' page tables, then we need to make 4375 * sure that it doesn't change while being written back. 4376 */ 4377 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) 4378 { 4379 struct folio *folio = page_folio(vmf->page); 4380 4381 /* Wait for the folio to be written to the cache before we allow it to 4382 * be modified. We then assume the entire folio will need writing back. 4383 */ 4384 #ifdef CONFIG_CIFS_FSCACHE 4385 if (folio_test_fscache(folio) && 4386 folio_wait_fscache_killable(folio) < 0) 4387 return VM_FAULT_RETRY; 4388 #endif 4389 4390 folio_wait_writeback(folio); 4391 4392 if (folio_lock_killable(folio) < 0) 4393 return VM_FAULT_RETRY; 4394 return VM_FAULT_LOCKED; 4395 } 4396 4397 static const struct vm_operations_struct cifs_file_vm_ops = { 4398 .fault = filemap_fault, 4399 .map_pages = filemap_map_pages, 4400 .page_mkwrite = cifs_page_mkwrite, 4401 }; 4402 4403 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 4404 { 4405 int xid, rc = 0; 4406 struct inode *inode = file_inode(file); 4407 4408 xid = get_xid(); 4409 4410 if (!CIFS_CACHE_READ(CIFS_I(inode))) 4411 rc = cifs_zap_mapping(inode); 4412 if (!rc) 4413 rc = generic_file_mmap(file, vma); 4414 if (!rc) 4415 vma->vm_ops = &cifs_file_vm_ops; 4416 4417 free_xid(xid); 4418 return rc; 4419 } 4420 4421 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) 4422 { 4423 int rc, xid; 4424 4425 xid = get_xid(); 4426 4427 rc = cifs_revalidate_file(file); 4428 if (rc) 4429 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", 4430 rc); 4431 if (!rc) 4432 rc = generic_file_mmap(file, vma); 4433 if (!rc) 4434 vma->vm_ops = &cifs_file_vm_ops; 4435 4436 free_xid(xid); 4437 return rc; 4438 } 4439 4440 /* 4441 * Unlock a bunch of folios in the pagecache. 4442 */ 4443 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last) 4444 { 4445 struct folio *folio; 4446 XA_STATE(xas, &mapping->i_pages, first); 4447 4448 rcu_read_lock(); 4449 xas_for_each(&xas, folio, last) { 4450 folio_unlock(folio); 4451 } 4452 rcu_read_unlock(); 4453 } 4454 4455 static void cifs_readahead_complete(struct work_struct *work) 4456 { 4457 struct cifs_readdata *rdata = container_of(work, 4458 struct cifs_readdata, work); 4459 struct folio *folio; 4460 pgoff_t last; 4461 bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes); 4462 4463 XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE); 4464 4465 if (good) 4466 cifs_readahead_to_fscache(rdata->mapping->host, 4467 rdata->offset, rdata->bytes); 4468 4469 if (iov_iter_count(&rdata->iter) > 0) 4470 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter); 4471 4472 last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE; 4473 4474 rcu_read_lock(); 4475 xas_for_each(&xas, folio, last) { 4476 if (good) { 4477 flush_dcache_folio(folio); 4478 folio_mark_uptodate(folio); 4479 } 4480 folio_unlock(folio); 4481 } 4482 rcu_read_unlock(); 4483 4484 kref_put(&rdata->refcount, cifs_readdata_release); 4485 } 4486 4487 static void cifs_readahead(struct readahead_control *ractl) 4488 { 4489 struct cifsFileInfo *open_file = ractl->file->private_data; 4490 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file); 4491 struct TCP_Server_Info *server; 4492 unsigned int xid, nr_pages, cache_nr_pages = 0; 4493 unsigned int ra_pages; 4494 pgoff_t next_cached = ULONG_MAX, ra_index; 4495 bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) && 4496 cifs_inode_cookie(ractl->mapping->host)->cache_priv; 4497 bool check_cache = caching; 4498 pid_t pid; 4499 int rc = 0; 4500 4501 /* Note that readahead_count() lags behind our dequeuing of pages from 4502 * the ractl, wo we have to keep track for ourselves. 4503 */ 4504 ra_pages = readahead_count(ractl); 4505 ra_index = readahead_index(ractl); 4506 4507 xid = get_xid(); 4508 4509 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4510 pid = open_file->pid; 4511 else 4512 pid = current->tgid; 4513 4514 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4515 4516 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", 4517 __func__, ractl->file, ractl->mapping, ra_pages); 4518 4519 /* 4520 * Chop the readahead request up into rsize-sized read requests. 4521 */ 4522 while ((nr_pages = ra_pages)) { 4523 unsigned int i, rsize; 4524 struct cifs_readdata *rdata; 4525 struct cifs_credits credits_on_stack; 4526 struct cifs_credits *credits = &credits_on_stack; 4527 struct folio *folio; 4528 pgoff_t fsize; 4529 4530 /* 4531 * Find out if we have anything cached in the range of 4532 * interest, and if so, where the next chunk of cached data is. 4533 */ 4534 if (caching) { 4535 if (check_cache) { 4536 rc = cifs_fscache_query_occupancy( 4537 ractl->mapping->host, ra_index, nr_pages, 4538 &next_cached, &cache_nr_pages); 4539 if (rc < 0) 4540 caching = false; 4541 check_cache = false; 4542 } 4543 4544 if (ra_index == next_cached) { 4545 /* 4546 * TODO: Send a whole batch of pages to be read 4547 * by the cache. 4548 */ 4549 folio = readahead_folio(ractl); 4550 fsize = folio_nr_pages(folio); 4551 ra_pages -= fsize; 4552 ra_index += fsize; 4553 if (cifs_readpage_from_fscache(ractl->mapping->host, 4554 &folio->page) < 0) { 4555 /* 4556 * TODO: Deal with cache read failure 4557 * here, but for the moment, delegate 4558 * that to readpage. 4559 */ 4560 caching = false; 4561 } 4562 folio_unlock(folio); 4563 next_cached += fsize; 4564 cache_nr_pages -= fsize; 4565 if (cache_nr_pages == 0) 4566 check_cache = true; 4567 continue; 4568 } 4569 } 4570 4571 if (open_file->invalidHandle) { 4572 rc = cifs_reopen_file(open_file, true); 4573 if (rc) { 4574 if (rc == -EAGAIN) 4575 continue; 4576 break; 4577 } 4578 } 4579 4580 if (cifs_sb->ctx->rsize == 0) 4581 cifs_sb->ctx->rsize = 4582 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4583 cifs_sb->ctx); 4584 4585 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4586 &rsize, credits); 4587 if (rc) 4588 break; 4589 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages); 4590 if (next_cached != ULONG_MAX) 4591 nr_pages = min_t(size_t, nr_pages, next_cached - ra_index); 4592 4593 /* 4594 * Give up immediately if rsize is too small to read an entire 4595 * page. The VFS will fall back to readpage. We should never 4596 * reach this point however since we set ra_pages to 0 when the 4597 * rsize is smaller than a cache page. 4598 */ 4599 if (unlikely(!nr_pages)) { 4600 add_credits_and_wake_if(server, credits, 0); 4601 break; 4602 } 4603 4604 rdata = cifs_readdata_alloc(cifs_readahead_complete); 4605 if (!rdata) { 4606 /* best to give up if we're out of mem */ 4607 add_credits_and_wake_if(server, credits, 0); 4608 break; 4609 } 4610 4611 rdata->offset = ra_index * PAGE_SIZE; 4612 rdata->bytes = nr_pages * PAGE_SIZE; 4613 rdata->cfile = cifsFileInfo_get(open_file); 4614 rdata->server = server; 4615 rdata->mapping = ractl->mapping; 4616 rdata->pid = pid; 4617 rdata->credits = credits_on_stack; 4618 4619 for (i = 0; i < nr_pages; i++) { 4620 if (!readahead_folio(ractl)) 4621 WARN_ON(1); 4622 } 4623 ra_pages -= nr_pages; 4624 ra_index += nr_pages; 4625 4626 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages, 4627 rdata->offset, rdata->bytes); 4628 4629 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4630 if (!rc) { 4631 if (rdata->cfile->invalidHandle) 4632 rc = -EAGAIN; 4633 else 4634 rc = server->ops->async_readv(rdata); 4635 } 4636 4637 if (rc) { 4638 add_credits_and_wake_if(server, &rdata->credits, 0); 4639 cifs_unlock_folios(rdata->mapping, 4640 rdata->offset / PAGE_SIZE, 4641 (rdata->offset + rdata->bytes - 1) / PAGE_SIZE); 4642 /* Fallback to the readpage in error/reconnect cases */ 4643 kref_put(&rdata->refcount, cifs_readdata_release); 4644 break; 4645 } 4646 4647 kref_put(&rdata->refcount, cifs_readdata_release); 4648 } 4649 4650 free_xid(xid); 4651 } 4652 4653 /* 4654 * cifs_readpage_worker must be called with the page pinned 4655 */ 4656 static int cifs_readpage_worker(struct file *file, struct page *page, 4657 loff_t *poffset) 4658 { 4659 struct inode *inode = file_inode(file); 4660 struct timespec64 atime, mtime; 4661 char *read_data; 4662 int rc; 4663 4664 /* Is the page cached? */ 4665 rc = cifs_readpage_from_fscache(inode, page); 4666 if (rc == 0) 4667 goto read_complete; 4668 4669 read_data = kmap(page); 4670 /* for reads over a certain size could initiate async read ahead */ 4671 4672 rc = cifs_read(file, read_data, PAGE_SIZE, poffset); 4673 4674 if (rc < 0) 4675 goto io_error; 4676 else 4677 cifs_dbg(FYI, "Bytes read %d\n", rc); 4678 4679 /* we do not want atime to be less than mtime, it broke some apps */ 4680 atime = inode_set_atime_to_ts(inode, current_time(inode)); 4681 mtime = inode_get_mtime(inode); 4682 if (timespec64_compare(&atime, &mtime) < 0) 4683 inode_set_atime_to_ts(inode, inode_get_mtime(inode)); 4684 4685 if (PAGE_SIZE > rc) 4686 memset(read_data + rc, 0, PAGE_SIZE - rc); 4687 4688 flush_dcache_page(page); 4689 SetPageUptodate(page); 4690 rc = 0; 4691 4692 io_error: 4693 kunmap(page); 4694 4695 read_complete: 4696 unlock_page(page); 4697 return rc; 4698 } 4699 4700 static int cifs_read_folio(struct file *file, struct folio *folio) 4701 { 4702 struct page *page = &folio->page; 4703 loff_t offset = page_file_offset(page); 4704 int rc = -EACCES; 4705 unsigned int xid; 4706 4707 xid = get_xid(); 4708 4709 if (file->private_data == NULL) { 4710 rc = -EBADF; 4711 free_xid(xid); 4712 return rc; 4713 } 4714 4715 cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n", 4716 page, (int)offset, (int)offset); 4717 4718 rc = cifs_readpage_worker(file, page, &offset); 4719 4720 free_xid(xid); 4721 return rc; 4722 } 4723 4724 static int is_inode_writable(struct cifsInodeInfo *cifs_inode) 4725 { 4726 struct cifsFileInfo *open_file; 4727 4728 spin_lock(&cifs_inode->open_file_lock); 4729 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 4730 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 4731 spin_unlock(&cifs_inode->open_file_lock); 4732 return 1; 4733 } 4734 } 4735 spin_unlock(&cifs_inode->open_file_lock); 4736 return 0; 4737 } 4738 4739 /* We do not want to update the file size from server for inodes 4740 open for write - to avoid races with writepage extending 4741 the file - in the future we could consider allowing 4742 refreshing the inode only on increases in the file size 4743 but this is tricky to do without racing with writebehind 4744 page caching in the current Linux kernel design */ 4745 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) 4746 { 4747 if (!cifsInode) 4748 return true; 4749 4750 if (is_inode_writable(cifsInode)) { 4751 /* This inode is open for write at least once */ 4752 struct cifs_sb_info *cifs_sb; 4753 4754 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb); 4755 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 4756 /* since no page cache to corrupt on directio 4757 we can change size safely */ 4758 return true; 4759 } 4760 4761 if (i_size_read(&cifsInode->netfs.inode) < end_of_file) 4762 return true; 4763 4764 return false; 4765 } else 4766 return true; 4767 } 4768 4769 static int cifs_write_begin(struct file *file, struct address_space *mapping, 4770 loff_t pos, unsigned len, 4771 struct page **pagep, void **fsdata) 4772 { 4773 int oncethru = 0; 4774 pgoff_t index = pos >> PAGE_SHIFT; 4775 loff_t offset = pos & (PAGE_SIZE - 1); 4776 loff_t page_start = pos & PAGE_MASK; 4777 loff_t i_size; 4778 struct page *page; 4779 int rc = 0; 4780 4781 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); 4782 4783 start: 4784 page = grab_cache_page_write_begin(mapping, index); 4785 if (!page) { 4786 rc = -ENOMEM; 4787 goto out; 4788 } 4789 4790 if (PageUptodate(page)) 4791 goto out; 4792 4793 /* 4794 * If we write a full page it will be up to date, no need to read from 4795 * the server. If the write is short, we'll end up doing a sync write 4796 * instead. 4797 */ 4798 if (len == PAGE_SIZE) 4799 goto out; 4800 4801 /* 4802 * optimize away the read when we have an oplock, and we're not 4803 * expecting to use any of the data we'd be reading in. That 4804 * is, when the page lies beyond the EOF, or straddles the EOF 4805 * and the write will cover all of the existing data. 4806 */ 4807 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { 4808 i_size = i_size_read(mapping->host); 4809 if (page_start >= i_size || 4810 (offset == 0 && (pos + len) >= i_size)) { 4811 zero_user_segments(page, 0, offset, 4812 offset + len, 4813 PAGE_SIZE); 4814 /* 4815 * PageChecked means that the parts of the page 4816 * to which we're not writing are considered up 4817 * to date. Once the data is copied to the 4818 * page, it can be set uptodate. 4819 */ 4820 SetPageChecked(page); 4821 goto out; 4822 } 4823 } 4824 4825 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) { 4826 /* 4827 * might as well read a page, it is fast enough. If we get 4828 * an error, we don't need to return it. cifs_write_end will 4829 * do a sync write instead since PG_uptodate isn't set. 4830 */ 4831 cifs_readpage_worker(file, page, &page_start); 4832 put_page(page); 4833 oncethru = 1; 4834 goto start; 4835 } else { 4836 /* we could try using another file handle if there is one - 4837 but how would we lock it to prevent close of that handle 4838 racing with this read? In any case 4839 this will be written out by write_end so is fine */ 4840 } 4841 out: 4842 *pagep = page; 4843 return rc; 4844 } 4845 4846 static bool cifs_release_folio(struct folio *folio, gfp_t gfp) 4847 { 4848 if (folio_test_private(folio)) 4849 return 0; 4850 if (folio_test_fscache(folio)) { 4851 if (current_is_kswapd() || !(gfp & __GFP_FS)) 4852 return false; 4853 folio_wait_fscache(folio); 4854 } 4855 fscache_note_page_release(cifs_inode_cookie(folio->mapping->host)); 4856 return true; 4857 } 4858 4859 static void cifs_invalidate_folio(struct folio *folio, size_t offset, 4860 size_t length) 4861 { 4862 folio_wait_fscache(folio); 4863 } 4864 4865 static int cifs_launder_folio(struct folio *folio) 4866 { 4867 int rc = 0; 4868 loff_t range_start = folio_pos(folio); 4869 loff_t range_end = range_start + folio_size(folio); 4870 struct writeback_control wbc = { 4871 .sync_mode = WB_SYNC_ALL, 4872 .nr_to_write = 0, 4873 .range_start = range_start, 4874 .range_end = range_end, 4875 }; 4876 4877 cifs_dbg(FYI, "Launder page: %lu\n", folio->index); 4878 4879 if (folio_clear_dirty_for_io(folio)) 4880 rc = cifs_writepage_locked(&folio->page, &wbc); 4881 4882 folio_wait_fscache(folio); 4883 return rc; 4884 } 4885 4886 void cifs_oplock_break(struct work_struct *work) 4887 { 4888 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 4889 oplock_break); 4890 struct inode *inode = d_inode(cfile->dentry); 4891 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 4892 struct cifsInodeInfo *cinode = CIFS_I(inode); 4893 struct cifs_tcon *tcon; 4894 struct TCP_Server_Info *server; 4895 struct tcon_link *tlink; 4896 int rc = 0; 4897 bool purge_cache = false, oplock_break_cancelled; 4898 __u64 persistent_fid, volatile_fid; 4899 __u16 net_fid; 4900 4901 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, 4902 TASK_UNINTERRUPTIBLE); 4903 4904 tlink = cifs_sb_tlink(cifs_sb); 4905 if (IS_ERR(tlink)) 4906 goto out; 4907 tcon = tlink_tcon(tlink); 4908 server = tcon->ses->server; 4909 4910 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level, 4911 cfile->oplock_epoch, &purge_cache); 4912 4913 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && 4914 cifs_has_mand_locks(cinode)) { 4915 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", 4916 inode); 4917 cinode->oplock = 0; 4918 } 4919 4920 if (inode && S_ISREG(inode->i_mode)) { 4921 if (CIFS_CACHE_READ(cinode)) 4922 break_lease(inode, O_RDONLY); 4923 else 4924 break_lease(inode, O_WRONLY); 4925 rc = filemap_fdatawrite(inode->i_mapping); 4926 if (!CIFS_CACHE_READ(cinode) || purge_cache) { 4927 rc = filemap_fdatawait(inode->i_mapping); 4928 mapping_set_error(inode->i_mapping, rc); 4929 cifs_zap_mapping(inode); 4930 } 4931 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc); 4932 if (CIFS_CACHE_WRITE(cinode)) 4933 goto oplock_break_ack; 4934 } 4935 4936 rc = cifs_push_locks(cfile); 4937 if (rc) 4938 cifs_dbg(VFS, "Push locks rc = %d\n", rc); 4939 4940 oplock_break_ack: 4941 /* 4942 * When oplock break is received and there are no active 4943 * file handles but cached, then schedule deferred close immediately. 4944 * So, new open will not use cached handle. 4945 */ 4946 4947 if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes)) 4948 cifs_close_deferred_file(cinode); 4949 4950 persistent_fid = cfile->fid.persistent_fid; 4951 volatile_fid = cfile->fid.volatile_fid; 4952 net_fid = cfile->fid.netfid; 4953 oplock_break_cancelled = cfile->oplock_break_cancelled; 4954 4955 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); 4956 /* 4957 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require 4958 * an acknowledgment to be sent when the file has already been closed. 4959 */ 4960 spin_lock(&cinode->open_file_lock); 4961 /* check list empty since can race with kill_sb calling tree disconnect */ 4962 if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) { 4963 spin_unlock(&cinode->open_file_lock); 4964 rc = server->ops->oplock_response(tcon, persistent_fid, 4965 volatile_fid, net_fid, cinode); 4966 cifs_dbg(FYI, "Oplock release rc = %d\n", rc); 4967 } else 4968 spin_unlock(&cinode->open_file_lock); 4969 4970 cifs_put_tlink(tlink); 4971 out: 4972 cifs_done_oplock_break(cinode); 4973 } 4974 4975 /* 4976 * The presence of cifs_direct_io() in the address space ops vector 4977 * allowes open() O_DIRECT flags which would have failed otherwise. 4978 * 4979 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests 4980 * so this method should never be called. 4981 * 4982 * Direct IO is not yet supported in the cached mode. 4983 */ 4984 static ssize_t 4985 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) 4986 { 4987 /* 4988 * FIXME 4989 * Eventually need to support direct IO for non forcedirectio mounts 4990 */ 4991 return -EINVAL; 4992 } 4993 4994 static int cifs_swap_activate(struct swap_info_struct *sis, 4995 struct file *swap_file, sector_t *span) 4996 { 4997 struct cifsFileInfo *cfile = swap_file->private_data; 4998 struct inode *inode = swap_file->f_mapping->host; 4999 unsigned long blocks; 5000 long long isize; 5001 5002 cifs_dbg(FYI, "swap activate\n"); 5003 5004 if (!swap_file->f_mapping->a_ops->swap_rw) 5005 /* Cannot support swap */ 5006 return -EINVAL; 5007 5008 spin_lock(&inode->i_lock); 5009 blocks = inode->i_blocks; 5010 isize = inode->i_size; 5011 spin_unlock(&inode->i_lock); 5012 if (blocks*512 < isize) { 5013 pr_warn("swap activate: swapfile has holes\n"); 5014 return -EINVAL; 5015 } 5016 *span = sis->pages; 5017 5018 pr_warn_once("Swap support over SMB3 is experimental\n"); 5019 5020 /* 5021 * TODO: consider adding ACL (or documenting how) to prevent other 5022 * users (on this or other systems) from reading it 5023 */ 5024 5025 5026 /* TODO: add sk_set_memalloc(inet) or similar */ 5027 5028 if (cfile) 5029 cfile->swapfile = true; 5030 /* 5031 * TODO: Since file already open, we can't open with DENY_ALL here 5032 * but we could add call to grab a byte range lock to prevent others 5033 * from reading or writing the file 5034 */ 5035 5036 sis->flags |= SWP_FS_OPS; 5037 return add_swap_extent(sis, 0, sis->max, 0); 5038 } 5039 5040 static void cifs_swap_deactivate(struct file *file) 5041 { 5042 struct cifsFileInfo *cfile = file->private_data; 5043 5044 cifs_dbg(FYI, "swap deactivate\n"); 5045 5046 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */ 5047 5048 if (cfile) 5049 cfile->swapfile = false; 5050 5051 /* do we need to unpin (or unlock) the file */ 5052 } 5053 5054 const struct address_space_operations cifs_addr_ops = { 5055 .read_folio = cifs_read_folio, 5056 .readahead = cifs_readahead, 5057 .writepages = cifs_writepages, 5058 .write_begin = cifs_write_begin, 5059 .write_end = cifs_write_end, 5060 .dirty_folio = netfs_dirty_folio, 5061 .release_folio = cifs_release_folio, 5062 .direct_IO = cifs_direct_io, 5063 .invalidate_folio = cifs_invalidate_folio, 5064 .launder_folio = cifs_launder_folio, 5065 .migrate_folio = filemap_migrate_folio, 5066 /* 5067 * TODO: investigate and if useful we could add an is_dirty_writeback 5068 * helper if needed 5069 */ 5070 .swap_activate = cifs_swap_activate, 5071 .swap_deactivate = cifs_swap_deactivate, 5072 }; 5073 5074 /* 5075 * cifs_readahead requires the server to support a buffer large enough to 5076 * contain the header plus one complete page of data. Otherwise, we need 5077 * to leave cifs_readahead out of the address space operations. 5078 */ 5079 const struct address_space_operations cifs_addr_ops_smallbuf = { 5080 .read_folio = cifs_read_folio, 5081 .writepages = cifs_writepages, 5082 .write_begin = cifs_write_begin, 5083 .write_end = cifs_write_end, 5084 .dirty_folio = netfs_dirty_folio, 5085 .release_folio = cifs_release_folio, 5086 .invalidate_folio = cifs_invalidate_folio, 5087 .launder_folio = cifs_launder_folio, 5088 .migrate_folio = filemap_migrate_folio, 5089 }; 5090