1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * 4 * vfs operations that deal with files 5 * 6 * Copyright (C) International Business Machines Corp., 2002,2010 7 * Author(s): Steve French (sfrench@us.ibm.com) 8 * Jeremy Allison (jra@samba.org) 9 * 10 */ 11 #include <linux/fs.h> 12 #include <linux/filelock.h> 13 #include <linux/backing-dev.h> 14 #include <linux/stat.h> 15 #include <linux/fcntl.h> 16 #include <linux/pagemap.h> 17 #include <linux/pagevec.h> 18 #include <linux/writeback.h> 19 #include <linux/task_io_accounting_ops.h> 20 #include <linux/delay.h> 21 #include <linux/mount.h> 22 #include <linux/slab.h> 23 #include <linux/swap.h> 24 #include <linux/mm.h> 25 #include <asm/div64.h> 26 #include "cifsfs.h" 27 #include "cifspdu.h" 28 #include "cifsglob.h" 29 #include "cifsproto.h" 30 #include "smb2proto.h" 31 #include "cifs_unicode.h" 32 #include "cifs_debug.h" 33 #include "cifs_fs_sb.h" 34 #include "fscache.h" 35 #include "smbdirect.h" 36 #include "fs_context.h" 37 #include "cifs_ioctl.h" 38 #include "cached_dir.h" 39 40 /* 41 * Remove the dirty flags from a span of pages. 42 */ 43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len) 44 { 45 struct address_space *mapping = inode->i_mapping; 46 struct folio *folio; 47 pgoff_t end; 48 49 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 50 51 rcu_read_lock(); 52 53 end = (start + len - 1) / PAGE_SIZE; 54 xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) { 55 if (xas_retry(&xas, folio)) 56 continue; 57 xas_pause(&xas); 58 rcu_read_unlock(); 59 folio_lock(folio); 60 folio_clear_dirty_for_io(folio); 61 folio_unlock(folio); 62 rcu_read_lock(); 63 } 64 65 rcu_read_unlock(); 66 } 67 68 /* 69 * Completion of write to server. 70 */ 71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len) 72 { 73 struct address_space *mapping = inode->i_mapping; 74 struct folio *folio; 75 pgoff_t end; 76 77 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 78 79 if (!len) 80 return; 81 82 rcu_read_lock(); 83 84 end = (start + len - 1) / PAGE_SIZE; 85 xas_for_each(&xas, folio, end) { 86 if (xas_retry(&xas, folio)) 87 continue; 88 if (!folio_test_writeback(folio)) { 89 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 90 len, start, folio->index, end); 91 continue; 92 } 93 94 folio_detach_private(folio); 95 folio_end_writeback(folio); 96 } 97 98 rcu_read_unlock(); 99 } 100 101 /* 102 * Failure of write to server. 103 */ 104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len) 105 { 106 struct address_space *mapping = inode->i_mapping; 107 struct folio *folio; 108 pgoff_t end; 109 110 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 111 112 if (!len) 113 return; 114 115 rcu_read_lock(); 116 117 end = (start + len - 1) / PAGE_SIZE; 118 xas_for_each(&xas, folio, end) { 119 if (xas_retry(&xas, folio)) 120 continue; 121 if (!folio_test_writeback(folio)) { 122 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 123 len, start, folio->index, end); 124 continue; 125 } 126 127 folio_set_error(folio); 128 folio_end_writeback(folio); 129 } 130 131 rcu_read_unlock(); 132 } 133 134 /* 135 * Redirty pages after a temporary failure. 136 */ 137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len) 138 { 139 struct address_space *mapping = inode->i_mapping; 140 struct folio *folio; 141 pgoff_t end; 142 143 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 144 145 if (!len) 146 return; 147 148 rcu_read_lock(); 149 150 end = (start + len - 1) / PAGE_SIZE; 151 xas_for_each(&xas, folio, end) { 152 if (!folio_test_writeback(folio)) { 153 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 154 len, start, folio->index, end); 155 continue; 156 } 157 158 filemap_dirty_folio(folio->mapping, folio); 159 folio_end_writeback(folio); 160 } 161 162 rcu_read_unlock(); 163 } 164 165 /* 166 * Mark as invalid, all open files on tree connections since they 167 * were closed when session to server was lost. 168 */ 169 void 170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon) 171 { 172 struct cifsFileInfo *open_file = NULL; 173 struct list_head *tmp; 174 struct list_head *tmp1; 175 176 /* only send once per connect */ 177 spin_lock(&tcon->tc_lock); 178 if (tcon->need_reconnect) 179 tcon->status = TID_NEED_RECON; 180 181 if (tcon->status != TID_NEED_RECON) { 182 spin_unlock(&tcon->tc_lock); 183 return; 184 } 185 tcon->status = TID_IN_FILES_INVALIDATE; 186 spin_unlock(&tcon->tc_lock); 187 188 /* list all files open on tree connection and mark them invalid */ 189 spin_lock(&tcon->open_file_lock); 190 list_for_each_safe(tmp, tmp1, &tcon->openFileList) { 191 open_file = list_entry(tmp, struct cifsFileInfo, tlist); 192 open_file->invalidHandle = true; 193 open_file->oplock_break_cancelled = true; 194 } 195 spin_unlock(&tcon->open_file_lock); 196 197 invalidate_all_cached_dirs(tcon); 198 spin_lock(&tcon->tc_lock); 199 if (tcon->status == TID_IN_FILES_INVALIDATE) 200 tcon->status = TID_NEED_TCON; 201 spin_unlock(&tcon->tc_lock); 202 203 /* 204 * BB Add call to invalidate_inodes(sb) for all superblocks mounted 205 * to this tcon. 206 */ 207 } 208 209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache) 210 { 211 if ((flags & O_ACCMODE) == O_RDONLY) 212 return GENERIC_READ; 213 else if ((flags & O_ACCMODE) == O_WRONLY) 214 return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE; 215 else if ((flags & O_ACCMODE) == O_RDWR) { 216 /* GENERIC_ALL is too much permission to request 217 can cause unnecessary access denied on create */ 218 /* return GENERIC_ALL; */ 219 return (GENERIC_READ | GENERIC_WRITE); 220 } 221 222 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES | 223 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA | 224 FILE_READ_DATA); 225 } 226 227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 228 static u32 cifs_posix_convert_flags(unsigned int flags) 229 { 230 u32 posix_flags = 0; 231 232 if ((flags & O_ACCMODE) == O_RDONLY) 233 posix_flags = SMB_O_RDONLY; 234 else if ((flags & O_ACCMODE) == O_WRONLY) 235 posix_flags = SMB_O_WRONLY; 236 else if ((flags & O_ACCMODE) == O_RDWR) 237 posix_flags = SMB_O_RDWR; 238 239 if (flags & O_CREAT) { 240 posix_flags |= SMB_O_CREAT; 241 if (flags & O_EXCL) 242 posix_flags |= SMB_O_EXCL; 243 } else if (flags & O_EXCL) 244 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n", 245 current->comm, current->tgid); 246 247 if (flags & O_TRUNC) 248 posix_flags |= SMB_O_TRUNC; 249 /* be safe and imply O_SYNC for O_DSYNC */ 250 if (flags & O_DSYNC) 251 posix_flags |= SMB_O_SYNC; 252 if (flags & O_DIRECTORY) 253 posix_flags |= SMB_O_DIRECTORY; 254 if (flags & O_NOFOLLOW) 255 posix_flags |= SMB_O_NOFOLLOW; 256 if (flags & O_DIRECT) 257 posix_flags |= SMB_O_DIRECT; 258 259 return posix_flags; 260 } 261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 262 263 static inline int cifs_get_disposition(unsigned int flags) 264 { 265 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) 266 return FILE_CREATE; 267 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) 268 return FILE_OVERWRITE_IF; 269 else if ((flags & O_CREAT) == O_CREAT) 270 return FILE_OPEN_IF; 271 else if ((flags & O_TRUNC) == O_TRUNC) 272 return FILE_OVERWRITE; 273 else 274 return FILE_OPEN; 275 } 276 277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 278 int cifs_posix_open(const char *full_path, struct inode **pinode, 279 struct super_block *sb, int mode, unsigned int f_flags, 280 __u32 *poplock, __u16 *pnetfid, unsigned int xid) 281 { 282 int rc; 283 FILE_UNIX_BASIC_INFO *presp_data; 284 __u32 posix_flags = 0; 285 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 286 struct cifs_fattr fattr; 287 struct tcon_link *tlink; 288 struct cifs_tcon *tcon; 289 290 cifs_dbg(FYI, "posix open %s\n", full_path); 291 292 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); 293 if (presp_data == NULL) 294 return -ENOMEM; 295 296 tlink = cifs_sb_tlink(cifs_sb); 297 if (IS_ERR(tlink)) { 298 rc = PTR_ERR(tlink); 299 goto posix_open_ret; 300 } 301 302 tcon = tlink_tcon(tlink); 303 mode &= ~current_umask(); 304 305 posix_flags = cifs_posix_convert_flags(f_flags); 306 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, 307 poplock, full_path, cifs_sb->local_nls, 308 cifs_remap(cifs_sb)); 309 cifs_put_tlink(tlink); 310 311 if (rc) 312 goto posix_open_ret; 313 314 if (presp_data->Type == cpu_to_le32(-1)) 315 goto posix_open_ret; /* open ok, caller does qpathinfo */ 316 317 if (!pinode) 318 goto posix_open_ret; /* caller does not need info */ 319 320 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb); 321 322 /* get new inode and set it up */ 323 if (*pinode == NULL) { 324 cifs_fill_uniqueid(sb, &fattr); 325 *pinode = cifs_iget(sb, &fattr); 326 if (!*pinode) { 327 rc = -ENOMEM; 328 goto posix_open_ret; 329 } 330 } else { 331 cifs_revalidate_mapping(*pinode); 332 rc = cifs_fattr_to_inode(*pinode, &fattr, false); 333 } 334 335 posix_open_ret: 336 kfree(presp_data); 337 return rc; 338 } 339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 340 341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, 342 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock, 343 struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf) 344 { 345 int rc; 346 int desired_access; 347 int disposition; 348 int create_options = CREATE_NOT_DIR; 349 struct TCP_Server_Info *server = tcon->ses->server; 350 struct cifs_open_parms oparms; 351 int rdwr_for_fscache = 0; 352 353 if (!server->ops->open) 354 return -ENOSYS; 355 356 /* If we're caching, we need to be able to fill in around partial writes. */ 357 if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY) 358 rdwr_for_fscache = 1; 359 360 desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache); 361 362 /********************************************************************* 363 * open flag mapping table: 364 * 365 * POSIX Flag CIFS Disposition 366 * ---------- ---------------- 367 * O_CREAT FILE_OPEN_IF 368 * O_CREAT | O_EXCL FILE_CREATE 369 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF 370 * O_TRUNC FILE_OVERWRITE 371 * none of the above FILE_OPEN 372 * 373 * Note that there is not a direct match between disposition 374 * FILE_SUPERSEDE (ie create whether or not file exists although 375 * O_CREAT | O_TRUNC is similar but truncates the existing 376 * file rather than creating a new file as FILE_SUPERSEDE does 377 * (which uses the attributes / metadata passed in on open call) 378 *? 379 *? O_SYNC is a reasonable match to CIFS writethrough flag 380 *? and the read write flags match reasonably. O_LARGEFILE 381 *? is irrelevant because largefile support is always used 382 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY, 383 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation 384 *********************************************************************/ 385 386 disposition = cifs_get_disposition(f_flags); 387 388 /* BB pass O_SYNC flag through on file attributes .. BB */ 389 390 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 391 if (f_flags & O_SYNC) 392 create_options |= CREATE_WRITE_THROUGH; 393 394 if (f_flags & O_DIRECT) 395 create_options |= CREATE_NO_BUFFER; 396 397 retry_open: 398 oparms = (struct cifs_open_parms) { 399 .tcon = tcon, 400 .cifs_sb = cifs_sb, 401 .desired_access = desired_access, 402 .create_options = cifs_create_options(cifs_sb, create_options), 403 .disposition = disposition, 404 .path = full_path, 405 .fid = fid, 406 }; 407 408 rc = server->ops->open(xid, &oparms, oplock, buf); 409 if (rc) { 410 if (rc == -EACCES && rdwr_for_fscache == 1) { 411 desired_access = cifs_convert_flags(f_flags, 0); 412 rdwr_for_fscache = 2; 413 goto retry_open; 414 } 415 return rc; 416 } 417 if (rdwr_for_fscache == 2) 418 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); 419 420 /* TODO: Add support for calling posix query info but with passing in fid */ 421 if (tcon->unix_ext) 422 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, 423 xid); 424 else 425 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, 426 xid, fid); 427 428 if (rc) { 429 server->ops->close(xid, tcon, fid); 430 if (rc == -ESTALE) 431 rc = -EOPENSTALE; 432 } 433 434 return rc; 435 } 436 437 static bool 438 cifs_has_mand_locks(struct cifsInodeInfo *cinode) 439 { 440 struct cifs_fid_locks *cur; 441 bool has_locks = false; 442 443 down_read(&cinode->lock_sem); 444 list_for_each_entry(cur, &cinode->llist, llist) { 445 if (!list_empty(&cur->locks)) { 446 has_locks = true; 447 break; 448 } 449 } 450 up_read(&cinode->lock_sem); 451 return has_locks; 452 } 453 454 void 455 cifs_down_write(struct rw_semaphore *sem) 456 { 457 while (!down_write_trylock(sem)) 458 msleep(10); 459 } 460 461 static void cifsFileInfo_put_work(struct work_struct *work); 462 void serverclose_work(struct work_struct *work); 463 464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, 465 struct tcon_link *tlink, __u32 oplock, 466 const char *symlink_target) 467 { 468 struct dentry *dentry = file_dentry(file); 469 struct inode *inode = d_inode(dentry); 470 struct cifsInodeInfo *cinode = CIFS_I(inode); 471 struct cifsFileInfo *cfile; 472 struct cifs_fid_locks *fdlocks; 473 struct cifs_tcon *tcon = tlink_tcon(tlink); 474 struct TCP_Server_Info *server = tcon->ses->server; 475 476 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 477 if (cfile == NULL) 478 return cfile; 479 480 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL); 481 if (!fdlocks) { 482 kfree(cfile); 483 return NULL; 484 } 485 486 if (symlink_target) { 487 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL); 488 if (!cfile->symlink_target) { 489 kfree(fdlocks); 490 kfree(cfile); 491 return NULL; 492 } 493 } 494 495 INIT_LIST_HEAD(&fdlocks->locks); 496 fdlocks->cfile = cfile; 497 cfile->llist = fdlocks; 498 499 cfile->count = 1; 500 cfile->pid = current->tgid; 501 cfile->uid = current_fsuid(); 502 cfile->dentry = dget(dentry); 503 cfile->f_flags = file->f_flags; 504 cfile->invalidHandle = false; 505 cfile->deferred_close_scheduled = false; 506 cfile->tlink = cifs_get_tlink(tlink); 507 INIT_WORK(&cfile->oplock_break, cifs_oplock_break); 508 INIT_WORK(&cfile->put, cifsFileInfo_put_work); 509 INIT_WORK(&cfile->serverclose, serverclose_work); 510 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); 511 mutex_init(&cfile->fh_mutex); 512 spin_lock_init(&cfile->file_info_lock); 513 514 cifs_sb_active(inode->i_sb); 515 516 /* 517 * If the server returned a read oplock and we have mandatory brlocks, 518 * set oplock level to None. 519 */ 520 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 521 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 522 oplock = 0; 523 } 524 525 cifs_down_write(&cinode->lock_sem); 526 list_add(&fdlocks->llist, &cinode->llist); 527 up_write(&cinode->lock_sem); 528 529 spin_lock(&tcon->open_file_lock); 530 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) 531 oplock = fid->pending_open->oplock; 532 list_del(&fid->pending_open->olist); 533 534 fid->purge_cache = false; 535 server->ops->set_fid(cfile, fid, oplock); 536 537 list_add(&cfile->tlist, &tcon->openFileList); 538 atomic_inc(&tcon->num_local_opens); 539 540 /* if readable file instance put first in list*/ 541 spin_lock(&cinode->open_file_lock); 542 if (file->f_mode & FMODE_READ) 543 list_add(&cfile->flist, &cinode->openFileList); 544 else 545 list_add_tail(&cfile->flist, &cinode->openFileList); 546 spin_unlock(&cinode->open_file_lock); 547 spin_unlock(&tcon->open_file_lock); 548 549 if (fid->purge_cache) 550 cifs_zap_mapping(inode); 551 552 file->private_data = cfile; 553 return cfile; 554 } 555 556 struct cifsFileInfo * 557 cifsFileInfo_get(struct cifsFileInfo *cifs_file) 558 { 559 spin_lock(&cifs_file->file_info_lock); 560 cifsFileInfo_get_locked(cifs_file); 561 spin_unlock(&cifs_file->file_info_lock); 562 return cifs_file; 563 } 564 565 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) 566 { 567 struct inode *inode = d_inode(cifs_file->dentry); 568 struct cifsInodeInfo *cifsi = CIFS_I(inode); 569 struct cifsLockInfo *li, *tmp; 570 struct super_block *sb = inode->i_sb; 571 572 /* 573 * Delete any outstanding lock records. We'll lose them when the file 574 * is closed anyway. 575 */ 576 cifs_down_write(&cifsi->lock_sem); 577 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { 578 list_del(&li->llist); 579 cifs_del_lock_waiters(li); 580 kfree(li); 581 } 582 list_del(&cifs_file->llist->llist); 583 kfree(cifs_file->llist); 584 up_write(&cifsi->lock_sem); 585 586 cifs_put_tlink(cifs_file->tlink); 587 dput(cifs_file->dentry); 588 cifs_sb_deactive(sb); 589 kfree(cifs_file->symlink_target); 590 kfree(cifs_file); 591 } 592 593 static void cifsFileInfo_put_work(struct work_struct *work) 594 { 595 struct cifsFileInfo *cifs_file = container_of(work, 596 struct cifsFileInfo, put); 597 598 cifsFileInfo_put_final(cifs_file); 599 } 600 601 void serverclose_work(struct work_struct *work) 602 { 603 struct cifsFileInfo *cifs_file = container_of(work, 604 struct cifsFileInfo, serverclose); 605 606 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 607 608 struct TCP_Server_Info *server = tcon->ses->server; 609 int rc = 0; 610 int retries = 0; 611 int MAX_RETRIES = 4; 612 613 do { 614 if (server->ops->close_getattr) 615 rc = server->ops->close_getattr(0, tcon, cifs_file); 616 else if (server->ops->close) 617 rc = server->ops->close(0, tcon, &cifs_file->fid); 618 619 if (rc == -EBUSY || rc == -EAGAIN) { 620 retries++; 621 msleep(250); 622 } 623 } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) 624 ); 625 626 if (retries == MAX_RETRIES) 627 pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES); 628 629 if (cifs_file->offload) 630 queue_work(fileinfo_put_wq, &cifs_file->put); 631 else 632 cifsFileInfo_put_final(cifs_file); 633 } 634 635 /** 636 * cifsFileInfo_put - release a reference of file priv data 637 * 638 * Always potentially wait for oplock handler. See _cifsFileInfo_put(). 639 * 640 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 641 */ 642 void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 643 { 644 _cifsFileInfo_put(cifs_file, true, true); 645 } 646 647 /** 648 * _cifsFileInfo_put - release a reference of file priv data 649 * 650 * This may involve closing the filehandle @cifs_file out on the 651 * server. Must be called without holding tcon->open_file_lock, 652 * cinode->open_file_lock and cifs_file->file_info_lock. 653 * 654 * If @wait_for_oplock_handler is true and we are releasing the last 655 * reference, wait for any running oplock break handler of the file 656 * and cancel any pending one. 657 * 658 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 659 * @wait_oplock_handler: must be false if called from oplock_break_handler 660 * @offload: not offloaded on close and oplock breaks 661 * 662 */ 663 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, 664 bool wait_oplock_handler, bool offload) 665 { 666 struct inode *inode = d_inode(cifs_file->dentry); 667 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 668 struct TCP_Server_Info *server = tcon->ses->server; 669 struct cifsInodeInfo *cifsi = CIFS_I(inode); 670 struct super_block *sb = inode->i_sb; 671 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 672 struct cifs_fid fid = {}; 673 struct cifs_pending_open open; 674 bool oplock_break_cancelled; 675 bool serverclose_offloaded = false; 676 677 spin_lock(&tcon->open_file_lock); 678 spin_lock(&cifsi->open_file_lock); 679 spin_lock(&cifs_file->file_info_lock); 680 681 cifs_file->offload = offload; 682 if (--cifs_file->count > 0) { 683 spin_unlock(&cifs_file->file_info_lock); 684 spin_unlock(&cifsi->open_file_lock); 685 spin_unlock(&tcon->open_file_lock); 686 return; 687 } 688 spin_unlock(&cifs_file->file_info_lock); 689 690 if (server->ops->get_lease_key) 691 server->ops->get_lease_key(inode, &fid); 692 693 /* store open in pending opens to make sure we don't miss lease break */ 694 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); 695 696 /* remove it from the lists */ 697 list_del(&cifs_file->flist); 698 list_del(&cifs_file->tlist); 699 atomic_dec(&tcon->num_local_opens); 700 701 if (list_empty(&cifsi->openFileList)) { 702 cifs_dbg(FYI, "closing last open instance for inode %p\n", 703 d_inode(cifs_file->dentry)); 704 /* 705 * In strict cache mode we need invalidate mapping on the last 706 * close because it may cause a error when we open this file 707 * again and get at least level II oplock. 708 */ 709 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) 710 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags); 711 cifs_set_oplock_level(cifsi, 0); 712 } 713 714 spin_unlock(&cifsi->open_file_lock); 715 spin_unlock(&tcon->open_file_lock); 716 717 oplock_break_cancelled = wait_oplock_handler ? 718 cancel_work_sync(&cifs_file->oplock_break) : false; 719 720 if (!tcon->need_reconnect && !cifs_file->invalidHandle) { 721 struct TCP_Server_Info *server = tcon->ses->server; 722 unsigned int xid; 723 int rc = 0; 724 725 xid = get_xid(); 726 if (server->ops->close_getattr) 727 rc = server->ops->close_getattr(xid, tcon, cifs_file); 728 else if (server->ops->close) 729 rc = server->ops->close(xid, tcon, &cifs_file->fid); 730 _free_xid(xid); 731 732 if (rc == -EBUSY || rc == -EAGAIN) { 733 // Server close failed, hence offloading it as an async op 734 queue_work(serverclose_wq, &cifs_file->serverclose); 735 serverclose_offloaded = true; 736 } 737 } 738 739 if (oplock_break_cancelled) 740 cifs_done_oplock_break(cifsi); 741 742 cifs_del_pending_open(&open); 743 744 // if serverclose has been offloaded to wq (on failure), it will 745 // handle offloading put as well. If serverclose not offloaded, 746 // we need to handle offloading put here. 747 if (!serverclose_offloaded) { 748 if (offload) 749 queue_work(fileinfo_put_wq, &cifs_file->put); 750 else 751 cifsFileInfo_put_final(cifs_file); 752 } 753 } 754 755 int cifs_open(struct inode *inode, struct file *file) 756 757 { 758 int rc = -EACCES; 759 unsigned int xid; 760 __u32 oplock; 761 struct cifs_sb_info *cifs_sb; 762 struct TCP_Server_Info *server; 763 struct cifs_tcon *tcon; 764 struct tcon_link *tlink; 765 struct cifsFileInfo *cfile = NULL; 766 void *page; 767 const char *full_path; 768 bool posix_open_ok = false; 769 struct cifs_fid fid = {}; 770 struct cifs_pending_open open; 771 struct cifs_open_info_data data = {}; 772 773 xid = get_xid(); 774 775 cifs_sb = CIFS_SB(inode->i_sb); 776 if (unlikely(cifs_forced_shutdown(cifs_sb))) { 777 free_xid(xid); 778 return -EIO; 779 } 780 781 tlink = cifs_sb_tlink(cifs_sb); 782 if (IS_ERR(tlink)) { 783 free_xid(xid); 784 return PTR_ERR(tlink); 785 } 786 tcon = tlink_tcon(tlink); 787 server = tcon->ses->server; 788 789 page = alloc_dentry_path(); 790 full_path = build_path_from_dentry(file_dentry(file), page); 791 if (IS_ERR(full_path)) { 792 rc = PTR_ERR(full_path); 793 goto out; 794 } 795 796 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", 797 inode, file->f_flags, full_path); 798 799 if (file->f_flags & O_DIRECT && 800 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { 801 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 802 file->f_op = &cifs_file_direct_nobrl_ops; 803 else 804 file->f_op = &cifs_file_direct_ops; 805 } 806 807 /* Get the cached handle as SMB2 close is deferred */ 808 rc = cifs_get_readable_path(tcon, full_path, &cfile); 809 if (rc == 0) { 810 if (file->f_flags == cfile->f_flags) { 811 file->private_data = cfile; 812 spin_lock(&CIFS_I(inode)->deferred_lock); 813 cifs_del_deferred_close(cfile); 814 spin_unlock(&CIFS_I(inode)->deferred_lock); 815 goto use_cache; 816 } else { 817 _cifsFileInfo_put(cfile, true, false); 818 } 819 } 820 821 if (server->oplocks) 822 oplock = REQ_OPLOCK; 823 else 824 oplock = 0; 825 826 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 827 if (!tcon->broken_posix_open && tcon->unix_ext && 828 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & 829 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 830 /* can not refresh inode info since size could be stale */ 831 rc = cifs_posix_open(full_path, &inode, inode->i_sb, 832 cifs_sb->ctx->file_mode /* ignored */, 833 file->f_flags, &oplock, &fid.netfid, xid); 834 if (rc == 0) { 835 cifs_dbg(FYI, "posix open succeeded\n"); 836 posix_open_ok = true; 837 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 838 if (tcon->ses->serverNOS) 839 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n", 840 tcon->ses->ip_addr, 841 tcon->ses->serverNOS); 842 tcon->broken_posix_open = true; 843 } else if ((rc != -EIO) && (rc != -EREMOTE) && 844 (rc != -EOPNOTSUPP)) /* path not found or net err */ 845 goto out; 846 /* 847 * Else fallthrough to retry open the old way on network i/o 848 * or DFS errors. 849 */ 850 } 851 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 852 853 if (server->ops->get_lease_key) 854 server->ops->get_lease_key(inode, &fid); 855 856 cifs_add_pending_open(&fid, tlink, &open); 857 858 if (!posix_open_ok) { 859 if (server->ops->get_lease_key) 860 server->ops->get_lease_key(inode, &fid); 861 862 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid, 863 xid, &data); 864 if (rc) { 865 cifs_del_pending_open(&open); 866 goto out; 867 } 868 } 869 870 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target); 871 if (cfile == NULL) { 872 if (server->ops->close) 873 server->ops->close(xid, tcon, &fid); 874 cifs_del_pending_open(&open); 875 rc = -ENOMEM; 876 goto out; 877 } 878 879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 880 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { 881 /* 882 * Time to set mode which we can not set earlier due to 883 * problems creating new read-only files. 884 */ 885 struct cifs_unix_set_info_args args = { 886 .mode = inode->i_mode, 887 .uid = INVALID_UID, /* no change */ 888 .gid = INVALID_GID, /* no change */ 889 .ctime = NO_CHANGE_64, 890 .atime = NO_CHANGE_64, 891 .mtime = NO_CHANGE_64, 892 .device = 0, 893 }; 894 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid, 895 cfile->pid); 896 } 897 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 898 899 use_cache: 900 fscache_use_cookie(cifs_inode_cookie(file_inode(file)), 901 file->f_mode & FMODE_WRITE); 902 if (!(file->f_flags & O_DIRECT)) 903 goto out; 904 if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY) 905 goto out; 906 cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE); 907 908 out: 909 free_dentry_path(page); 910 free_xid(xid); 911 cifs_put_tlink(tlink); 912 cifs_free_open_info(&data); 913 return rc; 914 } 915 916 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 917 static int cifs_push_posix_locks(struct cifsFileInfo *cfile); 918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 919 920 /* 921 * Try to reacquire byte range locks that were released when session 922 * to server was lost. 923 */ 924 static int 925 cifs_relock_file(struct cifsFileInfo *cfile) 926 { 927 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 928 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 929 int rc = 0; 930 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 931 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 932 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 933 934 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); 935 if (cinode->can_cache_brlcks) { 936 /* can cache locks - no need to relock */ 937 up_read(&cinode->lock_sem); 938 return rc; 939 } 940 941 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 942 if (cap_unix(tcon->ses) && 943 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 944 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 945 rc = cifs_push_posix_locks(cfile); 946 else 947 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 948 rc = tcon->ses->server->ops->push_mand_locks(cfile); 949 950 up_read(&cinode->lock_sem); 951 return rc; 952 } 953 954 static int 955 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) 956 { 957 int rc = -EACCES; 958 unsigned int xid; 959 __u32 oplock; 960 struct cifs_sb_info *cifs_sb; 961 struct cifs_tcon *tcon; 962 struct TCP_Server_Info *server; 963 struct cifsInodeInfo *cinode; 964 struct inode *inode; 965 void *page; 966 const char *full_path; 967 int desired_access; 968 int disposition = FILE_OPEN; 969 int create_options = CREATE_NOT_DIR; 970 struct cifs_open_parms oparms; 971 int rdwr_for_fscache = 0; 972 973 xid = get_xid(); 974 mutex_lock(&cfile->fh_mutex); 975 if (!cfile->invalidHandle) { 976 mutex_unlock(&cfile->fh_mutex); 977 free_xid(xid); 978 return 0; 979 } 980 981 inode = d_inode(cfile->dentry); 982 cifs_sb = CIFS_SB(inode->i_sb); 983 tcon = tlink_tcon(cfile->tlink); 984 server = tcon->ses->server; 985 986 /* 987 * Can not grab rename sem here because various ops, including those 988 * that already have the rename sem can end up causing writepage to get 989 * called and if the server was down that means we end up here, and we 990 * can never tell if the caller already has the rename_sem. 991 */ 992 page = alloc_dentry_path(); 993 full_path = build_path_from_dentry(cfile->dentry, page); 994 if (IS_ERR(full_path)) { 995 mutex_unlock(&cfile->fh_mutex); 996 free_dentry_path(page); 997 free_xid(xid); 998 return PTR_ERR(full_path); 999 } 1000 1001 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n", 1002 inode, cfile->f_flags, full_path); 1003 1004 if (tcon->ses->server->oplocks) 1005 oplock = REQ_OPLOCK; 1006 else 1007 oplock = 0; 1008 1009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1010 if (tcon->unix_ext && cap_unix(tcon->ses) && 1011 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 1012 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 1013 /* 1014 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the 1015 * original open. Must mask them off for a reopen. 1016 */ 1017 unsigned int oflags = cfile->f_flags & 1018 ~(O_CREAT | O_EXCL | O_TRUNC); 1019 1020 rc = cifs_posix_open(full_path, NULL, inode->i_sb, 1021 cifs_sb->ctx->file_mode /* ignored */, 1022 oflags, &oplock, &cfile->fid.netfid, xid); 1023 if (rc == 0) { 1024 cifs_dbg(FYI, "posix reopen succeeded\n"); 1025 oparms.reconnect = true; 1026 goto reopen_success; 1027 } 1028 /* 1029 * fallthrough to retry open the old way on errors, especially 1030 * in the reconnect path it is important to retry hard 1031 */ 1032 } 1033 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1034 1035 /* If we're caching, we need to be able to fill in around partial writes. */ 1036 if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY) 1037 rdwr_for_fscache = 1; 1038 1039 desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); 1040 1041 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 1042 if (cfile->f_flags & O_SYNC) 1043 create_options |= CREATE_WRITE_THROUGH; 1044 1045 if (cfile->f_flags & O_DIRECT) 1046 create_options |= CREATE_NO_BUFFER; 1047 1048 if (server->ops->get_lease_key) 1049 server->ops->get_lease_key(inode, &cfile->fid); 1050 1051 retry_open: 1052 oparms = (struct cifs_open_parms) { 1053 .tcon = tcon, 1054 .cifs_sb = cifs_sb, 1055 .desired_access = desired_access, 1056 .create_options = cifs_create_options(cifs_sb, create_options), 1057 .disposition = disposition, 1058 .path = full_path, 1059 .fid = &cfile->fid, 1060 .reconnect = true, 1061 }; 1062 1063 /* 1064 * Can not refresh inode by passing in file_info buf to be returned by 1065 * ops->open and then calling get_inode_info with returned buf since 1066 * file might have write behind data that needs to be flushed and server 1067 * version of file size can be stale. If we knew for sure that inode was 1068 * not dirty locally we could do this. 1069 */ 1070 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1071 if (rc == -ENOENT && oparms.reconnect == false) { 1072 /* durable handle timeout is expired - open the file again */ 1073 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1074 /* indicate that we need to relock the file */ 1075 oparms.reconnect = true; 1076 } 1077 if (rc == -EACCES && rdwr_for_fscache == 1) { 1078 desired_access = cifs_convert_flags(cfile->f_flags, 0); 1079 rdwr_for_fscache = 2; 1080 goto retry_open; 1081 } 1082 1083 if (rc) { 1084 mutex_unlock(&cfile->fh_mutex); 1085 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); 1086 cifs_dbg(FYI, "oplock: %d\n", oplock); 1087 goto reopen_error_exit; 1088 } 1089 1090 if (rdwr_for_fscache == 2) 1091 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); 1092 1093 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1094 reopen_success: 1095 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1096 cfile->invalidHandle = false; 1097 mutex_unlock(&cfile->fh_mutex); 1098 cinode = CIFS_I(inode); 1099 1100 if (can_flush) { 1101 rc = filemap_write_and_wait(inode->i_mapping); 1102 if (!is_interrupt_error(rc)) 1103 mapping_set_error(inode->i_mapping, rc); 1104 1105 if (tcon->posix_extensions) { 1106 rc = smb311_posix_get_inode_info(&inode, full_path, 1107 NULL, inode->i_sb, xid); 1108 } else if (tcon->unix_ext) { 1109 rc = cifs_get_inode_info_unix(&inode, full_path, 1110 inode->i_sb, xid); 1111 } else { 1112 rc = cifs_get_inode_info(&inode, full_path, NULL, 1113 inode->i_sb, xid, NULL); 1114 } 1115 } 1116 /* 1117 * Else we are writing out data to server already and could deadlock if 1118 * we tried to flush data, and since we do not know if we have data that 1119 * would invalidate the current end of file on the server we can not go 1120 * to the server to get the new inode info. 1121 */ 1122 1123 /* 1124 * If the server returned a read oplock and we have mandatory brlocks, 1125 * set oplock level to None. 1126 */ 1127 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 1128 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 1129 oplock = 0; 1130 } 1131 1132 server->ops->set_fid(cfile, &cfile->fid, oplock); 1133 if (oparms.reconnect) 1134 cifs_relock_file(cfile); 1135 1136 reopen_error_exit: 1137 free_dentry_path(page); 1138 free_xid(xid); 1139 return rc; 1140 } 1141 1142 void smb2_deferred_work_close(struct work_struct *work) 1143 { 1144 struct cifsFileInfo *cfile = container_of(work, 1145 struct cifsFileInfo, deferred.work); 1146 1147 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1148 cifs_del_deferred_close(cfile); 1149 cfile->deferred_close_scheduled = false; 1150 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1151 _cifsFileInfo_put(cfile, true, false); 1152 } 1153 1154 static bool 1155 smb2_can_defer_close(struct inode *inode, struct cifs_deferred_close *dclose) 1156 { 1157 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1158 struct cifsInodeInfo *cinode = CIFS_I(inode); 1159 1160 return (cifs_sb->ctx->closetimeo && cinode->lease_granted && dclose && 1161 (cinode->oplock == CIFS_CACHE_RHW_FLG || 1162 cinode->oplock == CIFS_CACHE_RH_FLG) && 1163 !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags)); 1164 1165 } 1166 1167 int cifs_close(struct inode *inode, struct file *file) 1168 { 1169 struct cifsFileInfo *cfile; 1170 struct cifsInodeInfo *cinode = CIFS_I(inode); 1171 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1172 struct cifs_deferred_close *dclose; 1173 1174 cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE); 1175 1176 if (file->private_data != NULL) { 1177 cfile = file->private_data; 1178 file->private_data = NULL; 1179 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); 1180 if ((cfile->status_file_deleted == false) && 1181 (smb2_can_defer_close(inode, dclose))) { 1182 if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { 1183 inode_set_mtime_to_ts(inode, 1184 inode_set_ctime_current(inode)); 1185 } 1186 spin_lock(&cinode->deferred_lock); 1187 cifs_add_deferred_close(cfile, dclose); 1188 if (cfile->deferred_close_scheduled && 1189 delayed_work_pending(&cfile->deferred)) { 1190 /* 1191 * If there is no pending work, mod_delayed_work queues new work. 1192 * So, Increase the ref count to avoid use-after-free. 1193 */ 1194 if (!mod_delayed_work(deferredclose_wq, 1195 &cfile->deferred, cifs_sb->ctx->closetimeo)) 1196 cifsFileInfo_get(cfile); 1197 } else { 1198 /* Deferred close for files */ 1199 queue_delayed_work(deferredclose_wq, 1200 &cfile->deferred, cifs_sb->ctx->closetimeo); 1201 cfile->deferred_close_scheduled = true; 1202 spin_unlock(&cinode->deferred_lock); 1203 return 0; 1204 } 1205 spin_unlock(&cinode->deferred_lock); 1206 _cifsFileInfo_put(cfile, true, false); 1207 } else { 1208 _cifsFileInfo_put(cfile, true, false); 1209 kfree(dclose); 1210 } 1211 } 1212 1213 /* return code from the ->release op is always ignored */ 1214 return 0; 1215 } 1216 1217 void 1218 cifs_reopen_persistent_handles(struct cifs_tcon *tcon) 1219 { 1220 struct cifsFileInfo *open_file, *tmp; 1221 struct list_head tmp_list; 1222 1223 if (!tcon->use_persistent || !tcon->need_reopen_files) 1224 return; 1225 1226 tcon->need_reopen_files = false; 1227 1228 cifs_dbg(FYI, "Reopen persistent handles\n"); 1229 INIT_LIST_HEAD(&tmp_list); 1230 1231 /* list all files open on tree connection, reopen resilient handles */ 1232 spin_lock(&tcon->open_file_lock); 1233 list_for_each_entry(open_file, &tcon->openFileList, tlist) { 1234 if (!open_file->invalidHandle) 1235 continue; 1236 cifsFileInfo_get(open_file); 1237 list_add_tail(&open_file->rlist, &tmp_list); 1238 } 1239 spin_unlock(&tcon->open_file_lock); 1240 1241 list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) { 1242 if (cifs_reopen_file(open_file, false /* do not flush */)) 1243 tcon->need_reopen_files = true; 1244 list_del_init(&open_file->rlist); 1245 cifsFileInfo_put(open_file); 1246 } 1247 } 1248 1249 int cifs_closedir(struct inode *inode, struct file *file) 1250 { 1251 int rc = 0; 1252 unsigned int xid; 1253 struct cifsFileInfo *cfile = file->private_data; 1254 struct cifs_tcon *tcon; 1255 struct TCP_Server_Info *server; 1256 char *buf; 1257 1258 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode); 1259 1260 if (cfile == NULL) 1261 return rc; 1262 1263 xid = get_xid(); 1264 tcon = tlink_tcon(cfile->tlink); 1265 server = tcon->ses->server; 1266 1267 cifs_dbg(FYI, "Freeing private data in close dir\n"); 1268 spin_lock(&cfile->file_info_lock); 1269 if (server->ops->dir_needs_close(cfile)) { 1270 cfile->invalidHandle = true; 1271 spin_unlock(&cfile->file_info_lock); 1272 if (server->ops->close_dir) 1273 rc = server->ops->close_dir(xid, tcon, &cfile->fid); 1274 else 1275 rc = -ENOSYS; 1276 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc); 1277 /* not much we can do if it fails anyway, ignore rc */ 1278 rc = 0; 1279 } else 1280 spin_unlock(&cfile->file_info_lock); 1281 1282 buf = cfile->srch_inf.ntwrk_buf_start; 1283 if (buf) { 1284 cifs_dbg(FYI, "closedir free smb buf in srch struct\n"); 1285 cfile->srch_inf.ntwrk_buf_start = NULL; 1286 if (cfile->srch_inf.smallBuf) 1287 cifs_small_buf_release(buf); 1288 else 1289 cifs_buf_release(buf); 1290 } 1291 1292 cifs_put_tlink(cfile->tlink); 1293 kfree(file->private_data); 1294 file->private_data = NULL; 1295 /* BB can we lock the filestruct while this is going on? */ 1296 free_xid(xid); 1297 return rc; 1298 } 1299 1300 static struct cifsLockInfo * 1301 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags) 1302 { 1303 struct cifsLockInfo *lock = 1304 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); 1305 if (!lock) 1306 return lock; 1307 lock->offset = offset; 1308 lock->length = length; 1309 lock->type = type; 1310 lock->pid = current->tgid; 1311 lock->flags = flags; 1312 INIT_LIST_HEAD(&lock->blist); 1313 init_waitqueue_head(&lock->block_q); 1314 return lock; 1315 } 1316 1317 void 1318 cifs_del_lock_waiters(struct cifsLockInfo *lock) 1319 { 1320 struct cifsLockInfo *li, *tmp; 1321 list_for_each_entry_safe(li, tmp, &lock->blist, blist) { 1322 list_del_init(&li->blist); 1323 wake_up(&li->block_q); 1324 } 1325 } 1326 1327 #define CIFS_LOCK_OP 0 1328 #define CIFS_READ_OP 1 1329 #define CIFS_WRITE_OP 2 1330 1331 /* @rw_check : 0 - no op, 1 - read, 2 - write */ 1332 static bool 1333 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, 1334 __u64 length, __u8 type, __u16 flags, 1335 struct cifsFileInfo *cfile, 1336 struct cifsLockInfo **conf_lock, int rw_check) 1337 { 1338 struct cifsLockInfo *li; 1339 struct cifsFileInfo *cur_cfile = fdlocks->cfile; 1340 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1341 1342 list_for_each_entry(li, &fdlocks->locks, llist) { 1343 if (offset + length <= li->offset || 1344 offset >= li->offset + li->length) 1345 continue; 1346 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid && 1347 server->ops->compare_fids(cfile, cur_cfile)) { 1348 /* shared lock prevents write op through the same fid */ 1349 if (!(li->type & server->vals->shared_lock_type) || 1350 rw_check != CIFS_WRITE_OP) 1351 continue; 1352 } 1353 if ((type & server->vals->shared_lock_type) && 1354 ((server->ops->compare_fids(cfile, cur_cfile) && 1355 current->tgid == li->pid) || type == li->type)) 1356 continue; 1357 if (rw_check == CIFS_LOCK_OP && 1358 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) && 1359 server->ops->compare_fids(cfile, cur_cfile)) 1360 continue; 1361 if (conf_lock) 1362 *conf_lock = li; 1363 return true; 1364 } 1365 return false; 1366 } 1367 1368 bool 1369 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1370 __u8 type, __u16 flags, 1371 struct cifsLockInfo **conf_lock, int rw_check) 1372 { 1373 bool rc = false; 1374 struct cifs_fid_locks *cur; 1375 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1376 1377 list_for_each_entry(cur, &cinode->llist, llist) { 1378 rc = cifs_find_fid_lock_conflict(cur, offset, length, type, 1379 flags, cfile, conf_lock, 1380 rw_check); 1381 if (rc) 1382 break; 1383 } 1384 1385 return rc; 1386 } 1387 1388 /* 1389 * Check if there is another lock that prevents us to set the lock (mandatory 1390 * style). If such a lock exists, update the flock structure with its 1391 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1392 * or leave it the same if we can't. Returns 0 if we don't need to request to 1393 * the server or 1 otherwise. 1394 */ 1395 static int 1396 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1397 __u8 type, struct file_lock *flock) 1398 { 1399 int rc = 0; 1400 struct cifsLockInfo *conf_lock; 1401 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1402 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1403 bool exist; 1404 1405 down_read(&cinode->lock_sem); 1406 1407 exist = cifs_find_lock_conflict(cfile, offset, length, type, 1408 flock->c.flc_flags, &conf_lock, 1409 CIFS_LOCK_OP); 1410 if (exist) { 1411 flock->fl_start = conf_lock->offset; 1412 flock->fl_end = conf_lock->offset + conf_lock->length - 1; 1413 flock->c.flc_pid = conf_lock->pid; 1414 if (conf_lock->type & server->vals->shared_lock_type) 1415 flock->c.flc_type = F_RDLCK; 1416 else 1417 flock->c.flc_type = F_WRLCK; 1418 } else if (!cinode->can_cache_brlcks) 1419 rc = 1; 1420 else 1421 flock->c.flc_type = F_UNLCK; 1422 1423 up_read(&cinode->lock_sem); 1424 return rc; 1425 } 1426 1427 static void 1428 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) 1429 { 1430 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1431 cifs_down_write(&cinode->lock_sem); 1432 list_add_tail(&lock->llist, &cfile->llist->locks); 1433 up_write(&cinode->lock_sem); 1434 } 1435 1436 /* 1437 * Set the byte-range lock (mandatory style). Returns: 1438 * 1) 0, if we set the lock and don't need to request to the server; 1439 * 2) 1, if no locks prevent us but we need to request to the server; 1440 * 3) -EACCES, if there is a lock that prevents us and wait is false. 1441 */ 1442 static int 1443 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, 1444 bool wait) 1445 { 1446 struct cifsLockInfo *conf_lock; 1447 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1448 bool exist; 1449 int rc = 0; 1450 1451 try_again: 1452 exist = false; 1453 cifs_down_write(&cinode->lock_sem); 1454 1455 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, 1456 lock->type, lock->flags, &conf_lock, 1457 CIFS_LOCK_OP); 1458 if (!exist && cinode->can_cache_brlcks) { 1459 list_add_tail(&lock->llist, &cfile->llist->locks); 1460 up_write(&cinode->lock_sem); 1461 return rc; 1462 } 1463 1464 if (!exist) 1465 rc = 1; 1466 else if (!wait) 1467 rc = -EACCES; 1468 else { 1469 list_add_tail(&lock->blist, &conf_lock->blist); 1470 up_write(&cinode->lock_sem); 1471 rc = wait_event_interruptible(lock->block_q, 1472 (lock->blist.prev == &lock->blist) && 1473 (lock->blist.next == &lock->blist)); 1474 if (!rc) 1475 goto try_again; 1476 cifs_down_write(&cinode->lock_sem); 1477 list_del_init(&lock->blist); 1478 } 1479 1480 up_write(&cinode->lock_sem); 1481 return rc; 1482 } 1483 1484 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1485 /* 1486 * Check if there is another lock that prevents us to set the lock (posix 1487 * style). If such a lock exists, update the flock structure with its 1488 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1489 * or leave it the same if we can't. Returns 0 if we don't need to request to 1490 * the server or 1 otherwise. 1491 */ 1492 static int 1493 cifs_posix_lock_test(struct file *file, struct file_lock *flock) 1494 { 1495 int rc = 0; 1496 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1497 unsigned char saved_type = flock->c.flc_type; 1498 1499 if ((flock->c.flc_flags & FL_POSIX) == 0) 1500 return 1; 1501 1502 down_read(&cinode->lock_sem); 1503 posix_test_lock(file, flock); 1504 1505 if (lock_is_unlock(flock) && !cinode->can_cache_brlcks) { 1506 flock->c.flc_type = saved_type; 1507 rc = 1; 1508 } 1509 1510 up_read(&cinode->lock_sem); 1511 return rc; 1512 } 1513 1514 /* 1515 * Set the byte-range lock (posix style). Returns: 1516 * 1) <0, if the error occurs while setting the lock; 1517 * 2) 0, if we set the lock and don't need to request to the server; 1518 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock; 1519 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server. 1520 */ 1521 static int 1522 cifs_posix_lock_set(struct file *file, struct file_lock *flock) 1523 { 1524 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1525 int rc = FILE_LOCK_DEFERRED + 1; 1526 1527 if ((flock->c.flc_flags & FL_POSIX) == 0) 1528 return rc; 1529 1530 cifs_down_write(&cinode->lock_sem); 1531 if (!cinode->can_cache_brlcks) { 1532 up_write(&cinode->lock_sem); 1533 return rc; 1534 } 1535 1536 rc = posix_lock_file(file, flock, NULL); 1537 up_write(&cinode->lock_sem); 1538 return rc; 1539 } 1540 1541 int 1542 cifs_push_mandatory_locks(struct cifsFileInfo *cfile) 1543 { 1544 unsigned int xid; 1545 int rc = 0, stored_rc; 1546 struct cifsLockInfo *li, *tmp; 1547 struct cifs_tcon *tcon; 1548 unsigned int num, max_num, max_buf; 1549 LOCKING_ANDX_RANGE *buf, *cur; 1550 static const int types[] = { 1551 LOCKING_ANDX_LARGE_FILES, 1552 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1553 }; 1554 int i; 1555 1556 xid = get_xid(); 1557 tcon = tlink_tcon(cfile->tlink); 1558 1559 /* 1560 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1561 * and check it before using. 1562 */ 1563 max_buf = tcon->ses->server->maxBuf; 1564 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) { 1565 free_xid(xid); 1566 return -EINVAL; 1567 } 1568 1569 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1570 PAGE_SIZE); 1571 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1572 PAGE_SIZE); 1573 max_num = (max_buf - sizeof(struct smb_hdr)) / 1574 sizeof(LOCKING_ANDX_RANGE); 1575 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1576 if (!buf) { 1577 free_xid(xid); 1578 return -ENOMEM; 1579 } 1580 1581 for (i = 0; i < 2; i++) { 1582 cur = buf; 1583 num = 0; 1584 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1585 if (li->type != types[i]) 1586 continue; 1587 cur->Pid = cpu_to_le16(li->pid); 1588 cur->LengthLow = cpu_to_le32((u32)li->length); 1589 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1590 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1591 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1592 if (++num == max_num) { 1593 stored_rc = cifs_lockv(xid, tcon, 1594 cfile->fid.netfid, 1595 (__u8)li->type, 0, num, 1596 buf); 1597 if (stored_rc) 1598 rc = stored_rc; 1599 cur = buf; 1600 num = 0; 1601 } else 1602 cur++; 1603 } 1604 1605 if (num) { 1606 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1607 (__u8)types[i], 0, num, buf); 1608 if (stored_rc) 1609 rc = stored_rc; 1610 } 1611 } 1612 1613 kfree(buf); 1614 free_xid(xid); 1615 return rc; 1616 } 1617 1618 static __u32 1619 hash_lockowner(fl_owner_t owner) 1620 { 1621 return cifs_lock_secret ^ hash32_ptr((const void *)owner); 1622 } 1623 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1624 1625 struct lock_to_push { 1626 struct list_head llist; 1627 __u64 offset; 1628 __u64 length; 1629 __u32 pid; 1630 __u16 netfid; 1631 __u8 type; 1632 }; 1633 1634 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1635 static int 1636 cifs_push_posix_locks(struct cifsFileInfo *cfile) 1637 { 1638 struct inode *inode = d_inode(cfile->dentry); 1639 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1640 struct file_lock *flock; 1641 struct file_lock_context *flctx = locks_inode_context(inode); 1642 unsigned int count = 0, i; 1643 int rc = 0, xid, type; 1644 struct list_head locks_to_send, *el; 1645 struct lock_to_push *lck, *tmp; 1646 __u64 length; 1647 1648 xid = get_xid(); 1649 1650 if (!flctx) 1651 goto out; 1652 1653 spin_lock(&flctx->flc_lock); 1654 list_for_each(el, &flctx->flc_posix) { 1655 count++; 1656 } 1657 spin_unlock(&flctx->flc_lock); 1658 1659 INIT_LIST_HEAD(&locks_to_send); 1660 1661 /* 1662 * Allocating count locks is enough because no FL_POSIX locks can be 1663 * added to the list while we are holding cinode->lock_sem that 1664 * protects locking operations of this inode. 1665 */ 1666 for (i = 0; i < count; i++) { 1667 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); 1668 if (!lck) { 1669 rc = -ENOMEM; 1670 goto err_out; 1671 } 1672 list_add_tail(&lck->llist, &locks_to_send); 1673 } 1674 1675 el = locks_to_send.next; 1676 spin_lock(&flctx->flc_lock); 1677 for_each_file_lock(flock, &flctx->flc_posix) { 1678 unsigned char ftype = flock->c.flc_type; 1679 1680 if (el == &locks_to_send) { 1681 /* 1682 * The list ended. We don't have enough allocated 1683 * structures - something is really wrong. 1684 */ 1685 cifs_dbg(VFS, "Can't push all brlocks!\n"); 1686 break; 1687 } 1688 length = cifs_flock_len(flock); 1689 if (ftype == F_RDLCK || ftype == F_SHLCK) 1690 type = CIFS_RDLCK; 1691 else 1692 type = CIFS_WRLCK; 1693 lck = list_entry(el, struct lock_to_push, llist); 1694 lck->pid = hash_lockowner(flock->c.flc_owner); 1695 lck->netfid = cfile->fid.netfid; 1696 lck->length = length; 1697 lck->type = type; 1698 lck->offset = flock->fl_start; 1699 } 1700 spin_unlock(&flctx->flc_lock); 1701 1702 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1703 int stored_rc; 1704 1705 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid, 1706 lck->offset, lck->length, NULL, 1707 lck->type, 0); 1708 if (stored_rc) 1709 rc = stored_rc; 1710 list_del(&lck->llist); 1711 kfree(lck); 1712 } 1713 1714 out: 1715 free_xid(xid); 1716 return rc; 1717 err_out: 1718 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1719 list_del(&lck->llist); 1720 kfree(lck); 1721 } 1722 goto out; 1723 } 1724 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1725 1726 static int 1727 cifs_push_locks(struct cifsFileInfo *cfile) 1728 { 1729 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1730 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1731 int rc = 0; 1732 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1733 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 1734 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1735 1736 /* we are going to update can_cache_brlcks here - need a write access */ 1737 cifs_down_write(&cinode->lock_sem); 1738 if (!cinode->can_cache_brlcks) { 1739 up_write(&cinode->lock_sem); 1740 return rc; 1741 } 1742 1743 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1744 if (cap_unix(tcon->ses) && 1745 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 1746 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 1747 rc = cifs_push_posix_locks(cfile); 1748 else 1749 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1750 rc = tcon->ses->server->ops->push_mand_locks(cfile); 1751 1752 cinode->can_cache_brlcks = false; 1753 up_write(&cinode->lock_sem); 1754 return rc; 1755 } 1756 1757 static void 1758 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, 1759 bool *wait_flag, struct TCP_Server_Info *server) 1760 { 1761 if (flock->c.flc_flags & FL_POSIX) 1762 cifs_dbg(FYI, "Posix\n"); 1763 if (flock->c.flc_flags & FL_FLOCK) 1764 cifs_dbg(FYI, "Flock\n"); 1765 if (flock->c.flc_flags & FL_SLEEP) { 1766 cifs_dbg(FYI, "Blocking lock\n"); 1767 *wait_flag = true; 1768 } 1769 if (flock->c.flc_flags & FL_ACCESS) 1770 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n"); 1771 if (flock->c.flc_flags & FL_LEASE) 1772 cifs_dbg(FYI, "Lease on file - not implemented yet\n"); 1773 if (flock->c.flc_flags & 1774 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | 1775 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK))) 1776 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", 1777 flock->c.flc_flags); 1778 1779 *type = server->vals->large_lock_type; 1780 if (lock_is_write(flock)) { 1781 cifs_dbg(FYI, "F_WRLCK\n"); 1782 *type |= server->vals->exclusive_lock_type; 1783 *lock = 1; 1784 } else if (lock_is_unlock(flock)) { 1785 cifs_dbg(FYI, "F_UNLCK\n"); 1786 *type |= server->vals->unlock_lock_type; 1787 *unlock = 1; 1788 /* Check if unlock includes more than one lock range */ 1789 } else if (lock_is_read(flock)) { 1790 cifs_dbg(FYI, "F_RDLCK\n"); 1791 *type |= server->vals->shared_lock_type; 1792 *lock = 1; 1793 } else if (flock->c.flc_type == F_EXLCK) { 1794 cifs_dbg(FYI, "F_EXLCK\n"); 1795 *type |= server->vals->exclusive_lock_type; 1796 *lock = 1; 1797 } else if (flock->c.flc_type == F_SHLCK) { 1798 cifs_dbg(FYI, "F_SHLCK\n"); 1799 *type |= server->vals->shared_lock_type; 1800 *lock = 1; 1801 } else 1802 cifs_dbg(FYI, "Unknown type of lock\n"); 1803 } 1804 1805 static int 1806 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, 1807 bool wait_flag, bool posix_lck, unsigned int xid) 1808 { 1809 int rc = 0; 1810 __u64 length = cifs_flock_len(flock); 1811 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1812 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1813 struct TCP_Server_Info *server = tcon->ses->server; 1814 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1815 __u16 netfid = cfile->fid.netfid; 1816 1817 if (posix_lck) { 1818 int posix_lock_type; 1819 1820 rc = cifs_posix_lock_test(file, flock); 1821 if (!rc) 1822 return rc; 1823 1824 if (type & server->vals->shared_lock_type) 1825 posix_lock_type = CIFS_RDLCK; 1826 else 1827 posix_lock_type = CIFS_WRLCK; 1828 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1829 hash_lockowner(flock->c.flc_owner), 1830 flock->fl_start, length, flock, 1831 posix_lock_type, wait_flag); 1832 return rc; 1833 } 1834 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1835 1836 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock); 1837 if (!rc) 1838 return rc; 1839 1840 /* BB we could chain these into one lock request BB */ 1841 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, 1842 1, 0, false); 1843 if (rc == 0) { 1844 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1845 type, 0, 1, false); 1846 flock->c.flc_type = F_UNLCK; 1847 if (rc != 0) 1848 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1849 rc); 1850 return 0; 1851 } 1852 1853 if (type & server->vals->shared_lock_type) { 1854 flock->c.flc_type = F_WRLCK; 1855 return 0; 1856 } 1857 1858 type &= ~server->vals->exclusive_lock_type; 1859 1860 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1861 type | server->vals->shared_lock_type, 1862 1, 0, false); 1863 if (rc == 0) { 1864 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1865 type | server->vals->shared_lock_type, 0, 1, false); 1866 flock->c.flc_type = F_RDLCK; 1867 if (rc != 0) 1868 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1869 rc); 1870 } else 1871 flock->c.flc_type = F_WRLCK; 1872 1873 return 0; 1874 } 1875 1876 void 1877 cifs_move_llist(struct list_head *source, struct list_head *dest) 1878 { 1879 struct list_head *li, *tmp; 1880 list_for_each_safe(li, tmp, source) 1881 list_move(li, dest); 1882 } 1883 1884 void 1885 cifs_free_llist(struct list_head *llist) 1886 { 1887 struct cifsLockInfo *li, *tmp; 1888 list_for_each_entry_safe(li, tmp, llist, llist) { 1889 cifs_del_lock_waiters(li); 1890 list_del(&li->llist); 1891 kfree(li); 1892 } 1893 } 1894 1895 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1896 int 1897 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, 1898 unsigned int xid) 1899 { 1900 int rc = 0, stored_rc; 1901 static const int types[] = { 1902 LOCKING_ANDX_LARGE_FILES, 1903 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1904 }; 1905 unsigned int i; 1906 unsigned int max_num, num, max_buf; 1907 LOCKING_ANDX_RANGE *buf, *cur; 1908 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1909 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1910 struct cifsLockInfo *li, *tmp; 1911 __u64 length = cifs_flock_len(flock); 1912 struct list_head tmp_llist; 1913 1914 INIT_LIST_HEAD(&tmp_llist); 1915 1916 /* 1917 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1918 * and check it before using. 1919 */ 1920 max_buf = tcon->ses->server->maxBuf; 1921 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) 1922 return -EINVAL; 1923 1924 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1925 PAGE_SIZE); 1926 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1927 PAGE_SIZE); 1928 max_num = (max_buf - sizeof(struct smb_hdr)) / 1929 sizeof(LOCKING_ANDX_RANGE); 1930 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1931 if (!buf) 1932 return -ENOMEM; 1933 1934 cifs_down_write(&cinode->lock_sem); 1935 for (i = 0; i < 2; i++) { 1936 cur = buf; 1937 num = 0; 1938 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1939 if (flock->fl_start > li->offset || 1940 (flock->fl_start + length) < 1941 (li->offset + li->length)) 1942 continue; 1943 if (current->tgid != li->pid) 1944 continue; 1945 if (types[i] != li->type) 1946 continue; 1947 if (cinode->can_cache_brlcks) { 1948 /* 1949 * We can cache brlock requests - simply remove 1950 * a lock from the file's list. 1951 */ 1952 list_del(&li->llist); 1953 cifs_del_lock_waiters(li); 1954 kfree(li); 1955 continue; 1956 } 1957 cur->Pid = cpu_to_le16(li->pid); 1958 cur->LengthLow = cpu_to_le32((u32)li->length); 1959 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1960 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1961 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1962 /* 1963 * We need to save a lock here to let us add it again to 1964 * the file's list if the unlock range request fails on 1965 * the server. 1966 */ 1967 list_move(&li->llist, &tmp_llist); 1968 if (++num == max_num) { 1969 stored_rc = cifs_lockv(xid, tcon, 1970 cfile->fid.netfid, 1971 li->type, num, 0, buf); 1972 if (stored_rc) { 1973 /* 1974 * We failed on the unlock range 1975 * request - add all locks from the tmp 1976 * list to the head of the file's list. 1977 */ 1978 cifs_move_llist(&tmp_llist, 1979 &cfile->llist->locks); 1980 rc = stored_rc; 1981 } else 1982 /* 1983 * The unlock range request succeed - 1984 * free the tmp list. 1985 */ 1986 cifs_free_llist(&tmp_llist); 1987 cur = buf; 1988 num = 0; 1989 } else 1990 cur++; 1991 } 1992 if (num) { 1993 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1994 types[i], num, 0, buf); 1995 if (stored_rc) { 1996 cifs_move_llist(&tmp_llist, 1997 &cfile->llist->locks); 1998 rc = stored_rc; 1999 } else 2000 cifs_free_llist(&tmp_llist); 2001 } 2002 } 2003 2004 up_write(&cinode->lock_sem); 2005 kfree(buf); 2006 return rc; 2007 } 2008 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 2009 2010 static int 2011 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, 2012 bool wait_flag, bool posix_lck, int lock, int unlock, 2013 unsigned int xid) 2014 { 2015 int rc = 0; 2016 __u64 length = cifs_flock_len(flock); 2017 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 2018 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 2019 struct TCP_Server_Info *server = tcon->ses->server; 2020 struct inode *inode = d_inode(cfile->dentry); 2021 2022 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 2023 if (posix_lck) { 2024 int posix_lock_type; 2025 2026 rc = cifs_posix_lock_set(file, flock); 2027 if (rc <= FILE_LOCK_DEFERRED) 2028 return rc; 2029 2030 if (type & server->vals->shared_lock_type) 2031 posix_lock_type = CIFS_RDLCK; 2032 else 2033 posix_lock_type = CIFS_WRLCK; 2034 2035 if (unlock == 1) 2036 posix_lock_type = CIFS_UNLCK; 2037 2038 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, 2039 hash_lockowner(flock->c.flc_owner), 2040 flock->fl_start, length, 2041 NULL, posix_lock_type, wait_flag); 2042 goto out; 2043 } 2044 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 2045 if (lock) { 2046 struct cifsLockInfo *lock; 2047 2048 lock = cifs_lock_init(flock->fl_start, length, type, 2049 flock->c.flc_flags); 2050 if (!lock) 2051 return -ENOMEM; 2052 2053 rc = cifs_lock_add_if(cfile, lock, wait_flag); 2054 if (rc < 0) { 2055 kfree(lock); 2056 return rc; 2057 } 2058 if (!rc) 2059 goto out; 2060 2061 /* 2062 * Windows 7 server can delay breaking lease from read to None 2063 * if we set a byte-range lock on a file - break it explicitly 2064 * before sending the lock to the server to be sure the next 2065 * read won't conflict with non-overlapted locks due to 2066 * pagereading. 2067 */ 2068 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) && 2069 CIFS_CACHE_READ(CIFS_I(inode))) { 2070 cifs_zap_mapping(inode); 2071 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", 2072 inode); 2073 CIFS_I(inode)->oplock = 0; 2074 } 2075 2076 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 2077 type, 1, 0, wait_flag); 2078 if (rc) { 2079 kfree(lock); 2080 return rc; 2081 } 2082 2083 cifs_lock_add(cfile, lock); 2084 } else if (unlock) 2085 rc = server->ops->mand_unlock_range(cfile, flock, xid); 2086 2087 out: 2088 if ((flock->c.flc_flags & FL_POSIX) || (flock->c.flc_flags & FL_FLOCK)) { 2089 /* 2090 * If this is a request to remove all locks because we 2091 * are closing the file, it doesn't matter if the 2092 * unlocking failed as both cifs.ko and the SMB server 2093 * remove the lock on file close 2094 */ 2095 if (rc) { 2096 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc); 2097 if (!(flock->c.flc_flags & FL_CLOSE)) 2098 return rc; 2099 } 2100 rc = locks_lock_file_wait(file, flock); 2101 } 2102 return rc; 2103 } 2104 2105 int cifs_flock(struct file *file, int cmd, struct file_lock *fl) 2106 { 2107 int rc, xid; 2108 int lock = 0, unlock = 0; 2109 bool wait_flag = false; 2110 bool posix_lck = false; 2111 struct cifs_sb_info *cifs_sb; 2112 struct cifs_tcon *tcon; 2113 struct cifsFileInfo *cfile; 2114 __u32 type; 2115 2116 xid = get_xid(); 2117 2118 if (!(fl->c.flc_flags & FL_FLOCK)) { 2119 rc = -ENOLCK; 2120 free_xid(xid); 2121 return rc; 2122 } 2123 2124 cfile = (struct cifsFileInfo *)file->private_data; 2125 tcon = tlink_tcon(cfile->tlink); 2126 2127 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag, 2128 tcon->ses->server); 2129 cifs_sb = CIFS_FILE_SB(file); 2130 2131 if (cap_unix(tcon->ses) && 2132 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2133 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2134 posix_lck = true; 2135 2136 if (!lock && !unlock) { 2137 /* 2138 * if no lock or unlock then nothing to do since we do not 2139 * know what it is 2140 */ 2141 rc = -EOPNOTSUPP; 2142 free_xid(xid); 2143 return rc; 2144 } 2145 2146 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock, 2147 xid); 2148 free_xid(xid); 2149 return rc; 2150 2151 2152 } 2153 2154 int cifs_lock(struct file *file, int cmd, struct file_lock *flock) 2155 { 2156 int rc, xid; 2157 int lock = 0, unlock = 0; 2158 bool wait_flag = false; 2159 bool posix_lck = false; 2160 struct cifs_sb_info *cifs_sb; 2161 struct cifs_tcon *tcon; 2162 struct cifsFileInfo *cfile; 2163 __u32 type; 2164 2165 rc = -EACCES; 2166 xid = get_xid(); 2167 2168 cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd, 2169 flock->c.flc_flags, flock->c.flc_type, 2170 (long long)flock->fl_start, 2171 (long long)flock->fl_end); 2172 2173 cfile = (struct cifsFileInfo *)file->private_data; 2174 tcon = tlink_tcon(cfile->tlink); 2175 2176 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, 2177 tcon->ses->server); 2178 cifs_sb = CIFS_FILE_SB(file); 2179 set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags); 2180 2181 if (cap_unix(tcon->ses) && 2182 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2183 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2184 posix_lck = true; 2185 /* 2186 * BB add code here to normalize offset and length to account for 2187 * negative length which we can not accept over the wire. 2188 */ 2189 if (IS_GETLK(cmd)) { 2190 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid); 2191 free_xid(xid); 2192 return rc; 2193 } 2194 2195 if (!lock && !unlock) { 2196 /* 2197 * if no lock or unlock then nothing to do since we do not 2198 * know what it is 2199 */ 2200 free_xid(xid); 2201 return -EOPNOTSUPP; 2202 } 2203 2204 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock, 2205 xid); 2206 free_xid(xid); 2207 return rc; 2208 } 2209 2210 /* 2211 * update the file size (if needed) after a write. Should be called with 2212 * the inode->i_lock held 2213 */ 2214 void 2215 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 2216 unsigned int bytes_written) 2217 { 2218 loff_t end_of_write = offset + bytes_written; 2219 2220 if (end_of_write > cifsi->netfs.remote_i_size) 2221 netfs_resize_file(&cifsi->netfs, end_of_write, true); 2222 } 2223 2224 static ssize_t 2225 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, 2226 size_t write_size, loff_t *offset) 2227 { 2228 int rc = 0; 2229 unsigned int bytes_written = 0; 2230 unsigned int total_written; 2231 struct cifs_tcon *tcon; 2232 struct TCP_Server_Info *server; 2233 unsigned int xid; 2234 struct dentry *dentry = open_file->dentry; 2235 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry)); 2236 struct cifs_io_parms io_parms = {0}; 2237 2238 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", 2239 write_size, *offset, dentry); 2240 2241 tcon = tlink_tcon(open_file->tlink); 2242 server = tcon->ses->server; 2243 2244 if (!server->ops->sync_write) 2245 return -ENOSYS; 2246 2247 xid = get_xid(); 2248 2249 for (total_written = 0; write_size > total_written; 2250 total_written += bytes_written) { 2251 rc = -EAGAIN; 2252 while (rc == -EAGAIN) { 2253 struct kvec iov[2]; 2254 unsigned int len; 2255 2256 if (open_file->invalidHandle) { 2257 /* we could deadlock if we called 2258 filemap_fdatawait from here so tell 2259 reopen_file not to flush data to 2260 server now */ 2261 rc = cifs_reopen_file(open_file, false); 2262 if (rc != 0) 2263 break; 2264 } 2265 2266 len = min(server->ops->wp_retry_size(d_inode(dentry)), 2267 (unsigned int)write_size - total_written); 2268 /* iov[0] is reserved for smb header */ 2269 iov[1].iov_base = (char *)write_data + total_written; 2270 iov[1].iov_len = len; 2271 io_parms.pid = pid; 2272 io_parms.tcon = tcon; 2273 io_parms.offset = *offset; 2274 io_parms.length = len; 2275 rc = server->ops->sync_write(xid, &open_file->fid, 2276 &io_parms, &bytes_written, iov, 1); 2277 } 2278 if (rc || (bytes_written == 0)) { 2279 if (total_written) 2280 break; 2281 else { 2282 free_xid(xid); 2283 return rc; 2284 } 2285 } else { 2286 spin_lock(&d_inode(dentry)->i_lock); 2287 cifs_update_eof(cifsi, *offset, bytes_written); 2288 spin_unlock(&d_inode(dentry)->i_lock); 2289 *offset += bytes_written; 2290 } 2291 } 2292 2293 cifs_stats_bytes_written(tcon, total_written); 2294 2295 if (total_written > 0) { 2296 spin_lock(&d_inode(dentry)->i_lock); 2297 if (*offset > d_inode(dentry)->i_size) { 2298 i_size_write(d_inode(dentry), *offset); 2299 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9; 2300 } 2301 spin_unlock(&d_inode(dentry)->i_lock); 2302 } 2303 mark_inode_dirty_sync(d_inode(dentry)); 2304 free_xid(xid); 2305 return total_written; 2306 } 2307 2308 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, 2309 bool fsuid_only) 2310 { 2311 struct cifsFileInfo *open_file = NULL; 2312 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2313 2314 /* only filter by fsuid on multiuser mounts */ 2315 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2316 fsuid_only = false; 2317 2318 spin_lock(&cifs_inode->open_file_lock); 2319 /* we could simply get the first_list_entry since write-only entries 2320 are always at the end of the list but since the first entry might 2321 have a close pending, we go through the whole list */ 2322 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2323 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2324 continue; 2325 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { 2326 if ((!open_file->invalidHandle)) { 2327 /* found a good file */ 2328 /* lock it so it will not be closed on us */ 2329 cifsFileInfo_get(open_file); 2330 spin_unlock(&cifs_inode->open_file_lock); 2331 return open_file; 2332 } /* else might as well continue, and look for 2333 another, or simply have the caller reopen it 2334 again rather than trying to fix this handle */ 2335 } else /* write only file */ 2336 break; /* write only files are last so must be done */ 2337 } 2338 spin_unlock(&cifs_inode->open_file_lock); 2339 return NULL; 2340 } 2341 2342 /* Return -EBADF if no handle is found and general rc otherwise */ 2343 int 2344 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, 2345 struct cifsFileInfo **ret_file) 2346 { 2347 struct cifsFileInfo *open_file, *inv_file = NULL; 2348 struct cifs_sb_info *cifs_sb; 2349 bool any_available = false; 2350 int rc = -EBADF; 2351 unsigned int refind = 0; 2352 bool fsuid_only = flags & FIND_WR_FSUID_ONLY; 2353 bool with_delete = flags & FIND_WR_WITH_DELETE; 2354 *ret_file = NULL; 2355 2356 /* 2357 * Having a null inode here (because mapping->host was set to zero by 2358 * the VFS or MM) should not happen but we had reports of on oops (due 2359 * to it being zero) during stress testcases so we need to check for it 2360 */ 2361 2362 if (cifs_inode == NULL) { 2363 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n"); 2364 dump_stack(); 2365 return rc; 2366 } 2367 2368 cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2369 2370 /* only filter by fsuid on multiuser mounts */ 2371 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2372 fsuid_only = false; 2373 2374 spin_lock(&cifs_inode->open_file_lock); 2375 refind_writable: 2376 if (refind > MAX_REOPEN_ATT) { 2377 spin_unlock(&cifs_inode->open_file_lock); 2378 return rc; 2379 } 2380 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2381 if (!any_available && open_file->pid != current->tgid) 2382 continue; 2383 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2384 continue; 2385 if (with_delete && !(open_file->fid.access & DELETE)) 2386 continue; 2387 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 2388 if (!open_file->invalidHandle) { 2389 /* found a good writable file */ 2390 cifsFileInfo_get(open_file); 2391 spin_unlock(&cifs_inode->open_file_lock); 2392 *ret_file = open_file; 2393 return 0; 2394 } else { 2395 if (!inv_file) 2396 inv_file = open_file; 2397 } 2398 } 2399 } 2400 /* couldn't find useable FH with same pid, try any available */ 2401 if (!any_available) { 2402 any_available = true; 2403 goto refind_writable; 2404 } 2405 2406 if (inv_file) { 2407 any_available = false; 2408 cifsFileInfo_get(inv_file); 2409 } 2410 2411 spin_unlock(&cifs_inode->open_file_lock); 2412 2413 if (inv_file) { 2414 rc = cifs_reopen_file(inv_file, false); 2415 if (!rc) { 2416 *ret_file = inv_file; 2417 return 0; 2418 } 2419 2420 spin_lock(&cifs_inode->open_file_lock); 2421 list_move_tail(&inv_file->flist, &cifs_inode->openFileList); 2422 spin_unlock(&cifs_inode->open_file_lock); 2423 cifsFileInfo_put(inv_file); 2424 ++refind; 2425 inv_file = NULL; 2426 spin_lock(&cifs_inode->open_file_lock); 2427 goto refind_writable; 2428 } 2429 2430 return rc; 2431 } 2432 2433 struct cifsFileInfo * 2434 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags) 2435 { 2436 struct cifsFileInfo *cfile; 2437 int rc; 2438 2439 rc = cifs_get_writable_file(cifs_inode, flags, &cfile); 2440 if (rc) 2441 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc); 2442 2443 return cfile; 2444 } 2445 2446 int 2447 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, 2448 int flags, 2449 struct cifsFileInfo **ret_file) 2450 { 2451 struct cifsFileInfo *cfile; 2452 void *page = alloc_dentry_path(); 2453 2454 *ret_file = NULL; 2455 2456 spin_lock(&tcon->open_file_lock); 2457 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2458 struct cifsInodeInfo *cinode; 2459 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2460 if (IS_ERR(full_path)) { 2461 spin_unlock(&tcon->open_file_lock); 2462 free_dentry_path(page); 2463 return PTR_ERR(full_path); 2464 } 2465 if (strcmp(full_path, name)) 2466 continue; 2467 2468 cinode = CIFS_I(d_inode(cfile->dentry)); 2469 spin_unlock(&tcon->open_file_lock); 2470 free_dentry_path(page); 2471 return cifs_get_writable_file(cinode, flags, ret_file); 2472 } 2473 2474 spin_unlock(&tcon->open_file_lock); 2475 free_dentry_path(page); 2476 return -ENOENT; 2477 } 2478 2479 int 2480 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, 2481 struct cifsFileInfo **ret_file) 2482 { 2483 struct cifsFileInfo *cfile; 2484 void *page = alloc_dentry_path(); 2485 2486 *ret_file = NULL; 2487 2488 spin_lock(&tcon->open_file_lock); 2489 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2490 struct cifsInodeInfo *cinode; 2491 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2492 if (IS_ERR(full_path)) { 2493 spin_unlock(&tcon->open_file_lock); 2494 free_dentry_path(page); 2495 return PTR_ERR(full_path); 2496 } 2497 if (strcmp(full_path, name)) 2498 continue; 2499 2500 cinode = CIFS_I(d_inode(cfile->dentry)); 2501 spin_unlock(&tcon->open_file_lock); 2502 free_dentry_path(page); 2503 *ret_file = find_readable_file(cinode, 0); 2504 return *ret_file ? 0 : -ENOENT; 2505 } 2506 2507 spin_unlock(&tcon->open_file_lock); 2508 free_dentry_path(page); 2509 return -ENOENT; 2510 } 2511 2512 void 2513 cifs_writedata_release(struct kref *refcount) 2514 { 2515 struct cifs_writedata *wdata = container_of(refcount, 2516 struct cifs_writedata, refcount); 2517 #ifdef CONFIG_CIFS_SMB_DIRECT 2518 if (wdata->mr) { 2519 smbd_deregister_mr(wdata->mr); 2520 wdata->mr = NULL; 2521 } 2522 #endif 2523 2524 if (wdata->cfile) 2525 cifsFileInfo_put(wdata->cfile); 2526 2527 kfree(wdata); 2528 } 2529 2530 /* 2531 * Write failed with a retryable error. Resend the write request. It's also 2532 * possible that the page was redirtied so re-clean the page. 2533 */ 2534 static void 2535 cifs_writev_requeue(struct cifs_writedata *wdata) 2536 { 2537 int rc = 0; 2538 struct inode *inode = d_inode(wdata->cfile->dentry); 2539 struct TCP_Server_Info *server; 2540 unsigned int rest_len = wdata->bytes; 2541 loff_t fpos = wdata->offset; 2542 2543 server = tlink_tcon(wdata->cfile->tlink)->ses->server; 2544 do { 2545 struct cifs_writedata *wdata2; 2546 unsigned int wsize, cur_len; 2547 2548 wsize = server->ops->wp_retry_size(inode); 2549 if (wsize < rest_len) { 2550 if (wsize < PAGE_SIZE) { 2551 rc = -EOPNOTSUPP; 2552 break; 2553 } 2554 cur_len = min(round_down(wsize, PAGE_SIZE), rest_len); 2555 } else { 2556 cur_len = rest_len; 2557 } 2558 2559 wdata2 = cifs_writedata_alloc(cifs_writev_complete); 2560 if (!wdata2) { 2561 rc = -ENOMEM; 2562 break; 2563 } 2564 2565 wdata2->sync_mode = wdata->sync_mode; 2566 wdata2->offset = fpos; 2567 wdata2->bytes = cur_len; 2568 wdata2->iter = wdata->iter; 2569 2570 iov_iter_advance(&wdata2->iter, fpos - wdata->offset); 2571 iov_iter_truncate(&wdata2->iter, wdata2->bytes); 2572 2573 if (iov_iter_is_xarray(&wdata2->iter)) 2574 /* Check for pages having been redirtied and clean 2575 * them. We can do this by walking the xarray. If 2576 * it's not an xarray, then it's a DIO and we shouldn't 2577 * be mucking around with the page bits. 2578 */ 2579 cifs_undirty_folios(inode, fpos, cur_len); 2580 2581 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, 2582 &wdata2->cfile); 2583 if (!wdata2->cfile) { 2584 cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", 2585 rc); 2586 if (!is_retryable_error(rc)) 2587 rc = -EBADF; 2588 } else { 2589 wdata2->pid = wdata2->cfile->pid; 2590 rc = server->ops->async_writev(wdata2, 2591 cifs_writedata_release); 2592 } 2593 2594 kref_put(&wdata2->refcount, cifs_writedata_release); 2595 if (rc) { 2596 if (is_retryable_error(rc)) 2597 continue; 2598 fpos += cur_len; 2599 rest_len -= cur_len; 2600 break; 2601 } 2602 2603 fpos += cur_len; 2604 rest_len -= cur_len; 2605 } while (rest_len > 0); 2606 2607 /* Clean up remaining pages from the original wdata */ 2608 if (iov_iter_is_xarray(&wdata->iter)) 2609 cifs_pages_write_failed(inode, fpos, rest_len); 2610 2611 if (rc != 0 && !is_retryable_error(rc)) 2612 mapping_set_error(inode->i_mapping, rc); 2613 kref_put(&wdata->refcount, cifs_writedata_release); 2614 } 2615 2616 void 2617 cifs_writev_complete(struct work_struct *work) 2618 { 2619 struct cifs_writedata *wdata = container_of(work, 2620 struct cifs_writedata, work); 2621 struct inode *inode = d_inode(wdata->cfile->dentry); 2622 2623 if (wdata->result == 0) { 2624 spin_lock(&inode->i_lock); 2625 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); 2626 spin_unlock(&inode->i_lock); 2627 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink), 2628 wdata->bytes); 2629 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) 2630 return cifs_writev_requeue(wdata); 2631 2632 if (wdata->result == -EAGAIN) 2633 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes); 2634 else if (wdata->result < 0) 2635 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes); 2636 else 2637 cifs_pages_written_back(inode, wdata->offset, wdata->bytes); 2638 2639 if (wdata->result != -EAGAIN) 2640 mapping_set_error(inode->i_mapping, wdata->result); 2641 kref_put(&wdata->refcount, cifs_writedata_release); 2642 } 2643 2644 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete) 2645 { 2646 struct cifs_writedata *wdata; 2647 2648 wdata = kzalloc(sizeof(*wdata), GFP_NOFS); 2649 if (wdata != NULL) { 2650 kref_init(&wdata->refcount); 2651 INIT_LIST_HEAD(&wdata->list); 2652 init_completion(&wdata->done); 2653 INIT_WORK(&wdata->work, complete); 2654 } 2655 return wdata; 2656 } 2657 2658 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) 2659 { 2660 struct address_space *mapping = page->mapping; 2661 loff_t offset = (loff_t)page->index << PAGE_SHIFT; 2662 char *write_data; 2663 int rc = -EFAULT; 2664 int bytes_written = 0; 2665 struct inode *inode; 2666 struct cifsFileInfo *open_file; 2667 2668 if (!mapping || !mapping->host) 2669 return -EFAULT; 2670 2671 inode = page->mapping->host; 2672 2673 offset += (loff_t)from; 2674 write_data = kmap(page); 2675 write_data += from; 2676 2677 if ((to > PAGE_SIZE) || (from > to)) { 2678 kunmap(page); 2679 return -EIO; 2680 } 2681 2682 /* racing with truncate? */ 2683 if (offset > mapping->host->i_size) { 2684 kunmap(page); 2685 return 0; /* don't care */ 2686 } 2687 2688 /* check to make sure that we are not extending the file */ 2689 if (mapping->host->i_size - offset < (loff_t)to) 2690 to = (unsigned)(mapping->host->i_size - offset); 2691 2692 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, 2693 &open_file); 2694 if (!rc) { 2695 bytes_written = cifs_write(open_file, open_file->pid, 2696 write_data, to - from, &offset); 2697 cifsFileInfo_put(open_file); 2698 /* Does mm or vfs already set times? */ 2699 simple_inode_init_ts(inode); 2700 if ((bytes_written > 0) && (offset)) 2701 rc = 0; 2702 else if (bytes_written < 0) 2703 rc = bytes_written; 2704 else 2705 rc = -EFAULT; 2706 } else { 2707 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc); 2708 if (!is_retryable_error(rc)) 2709 rc = -EIO; 2710 } 2711 2712 kunmap(page); 2713 return rc; 2714 } 2715 2716 /* 2717 * Extend the region to be written back to include subsequent contiguously 2718 * dirty pages if possible, but don't sleep while doing so. 2719 */ 2720 static void cifs_extend_writeback(struct address_space *mapping, 2721 struct xa_state *xas, 2722 long *_count, 2723 loff_t start, 2724 int max_pages, 2725 loff_t max_len, 2726 size_t *_len) 2727 { 2728 struct folio_batch batch; 2729 struct folio *folio; 2730 unsigned int nr_pages; 2731 pgoff_t index = (start + *_len) / PAGE_SIZE; 2732 size_t len; 2733 bool stop = true; 2734 unsigned int i; 2735 2736 folio_batch_init(&batch); 2737 2738 do { 2739 /* Firstly, we gather up a batch of contiguous dirty pages 2740 * under the RCU read lock - but we can't clear the dirty flags 2741 * there if any of those pages are mapped. 2742 */ 2743 rcu_read_lock(); 2744 2745 xas_for_each(xas, folio, ULONG_MAX) { 2746 stop = true; 2747 if (xas_retry(xas, folio)) 2748 continue; 2749 if (xa_is_value(folio)) 2750 break; 2751 if (folio->index != index) { 2752 xas_reset(xas); 2753 break; 2754 } 2755 2756 if (!folio_try_get_rcu(folio)) { 2757 xas_reset(xas); 2758 continue; 2759 } 2760 nr_pages = folio_nr_pages(folio); 2761 if (nr_pages > max_pages) { 2762 xas_reset(xas); 2763 break; 2764 } 2765 2766 /* Has the page moved or been split? */ 2767 if (unlikely(folio != xas_reload(xas))) { 2768 folio_put(folio); 2769 xas_reset(xas); 2770 break; 2771 } 2772 2773 if (!folio_trylock(folio)) { 2774 folio_put(folio); 2775 xas_reset(xas); 2776 break; 2777 } 2778 if (!folio_test_dirty(folio) || 2779 folio_test_writeback(folio)) { 2780 folio_unlock(folio); 2781 folio_put(folio); 2782 xas_reset(xas); 2783 break; 2784 } 2785 2786 max_pages -= nr_pages; 2787 len = folio_size(folio); 2788 stop = false; 2789 2790 index += nr_pages; 2791 *_count -= nr_pages; 2792 *_len += len; 2793 if (max_pages <= 0 || *_len >= max_len || *_count <= 0) 2794 stop = true; 2795 2796 if (!folio_batch_add(&batch, folio)) 2797 break; 2798 if (stop) 2799 break; 2800 } 2801 2802 xas_pause(xas); 2803 rcu_read_unlock(); 2804 2805 /* Now, if we obtained any pages, we can shift them to being 2806 * writable and mark them for caching. 2807 */ 2808 if (!folio_batch_count(&batch)) 2809 break; 2810 2811 for (i = 0; i < folio_batch_count(&batch); i++) { 2812 folio = batch.folios[i]; 2813 /* The folio should be locked, dirty and not undergoing 2814 * writeback from the loop above. 2815 */ 2816 if (!folio_clear_dirty_for_io(folio)) 2817 WARN_ON(1); 2818 folio_start_writeback(folio); 2819 folio_unlock(folio); 2820 } 2821 2822 folio_batch_release(&batch); 2823 cond_resched(); 2824 } while (!stop); 2825 } 2826 2827 /* 2828 * Write back the locked page and any subsequent non-locked dirty pages. 2829 */ 2830 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, 2831 struct writeback_control *wbc, 2832 struct xa_state *xas, 2833 struct folio *folio, 2834 unsigned long long start, 2835 unsigned long long end) 2836 { 2837 struct inode *inode = mapping->host; 2838 struct TCP_Server_Info *server; 2839 struct cifs_writedata *wdata; 2840 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2841 struct cifs_credits credits_on_stack; 2842 struct cifs_credits *credits = &credits_on_stack; 2843 struct cifsFileInfo *cfile = NULL; 2844 unsigned long long i_size = i_size_read(inode), max_len; 2845 unsigned int xid, wsize; 2846 size_t len = folio_size(folio); 2847 long count = wbc->nr_to_write; 2848 int rc; 2849 2850 /* The folio should be locked, dirty and not undergoing writeback. */ 2851 if (!folio_clear_dirty_for_io(folio)) 2852 WARN_ON_ONCE(1); 2853 folio_start_writeback(folio); 2854 2855 count -= folio_nr_pages(folio); 2856 2857 xid = get_xid(); 2858 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); 2859 2860 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); 2861 if (rc) { 2862 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc); 2863 goto err_xid; 2864 } 2865 2866 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 2867 &wsize, credits); 2868 if (rc != 0) 2869 goto err_close; 2870 2871 wdata = cifs_writedata_alloc(cifs_writev_complete); 2872 if (!wdata) { 2873 rc = -ENOMEM; 2874 goto err_uncredit; 2875 } 2876 2877 wdata->sync_mode = wbc->sync_mode; 2878 wdata->offset = folio_pos(folio); 2879 wdata->pid = cfile->pid; 2880 wdata->credits = credits_on_stack; 2881 wdata->cfile = cfile; 2882 wdata->server = server; 2883 cfile = NULL; 2884 2885 /* Find all consecutive lockable dirty pages that have contiguous 2886 * written regions, stopping when we find a page that is not 2887 * immediately lockable, is not dirty or is missing, or we reach the 2888 * end of the range. 2889 */ 2890 if (start < i_size) { 2891 /* Trim the write to the EOF; the extra data is ignored. Also 2892 * put an upper limit on the size of a single storedata op. 2893 */ 2894 max_len = wsize; 2895 max_len = min_t(unsigned long long, max_len, end - start + 1); 2896 max_len = min_t(unsigned long long, max_len, i_size - start); 2897 2898 if (len < max_len) { 2899 int max_pages = INT_MAX; 2900 2901 #ifdef CONFIG_CIFS_SMB_DIRECT 2902 if (server->smbd_conn) 2903 max_pages = server->smbd_conn->max_frmr_depth; 2904 #endif 2905 max_pages -= folio_nr_pages(folio); 2906 2907 if (max_pages > 0) 2908 cifs_extend_writeback(mapping, xas, &count, start, 2909 max_pages, max_len, &len); 2910 } 2911 } 2912 len = min_t(unsigned long long, len, i_size - start); 2913 2914 /* We now have a contiguous set of dirty pages, each with writeback 2915 * set; the first page is still locked at this point, but all the rest 2916 * have been unlocked. 2917 */ 2918 folio_unlock(folio); 2919 wdata->bytes = len; 2920 2921 if (start < i_size) { 2922 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, 2923 start, len); 2924 2925 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); 2926 if (rc) 2927 goto err_wdata; 2928 2929 if (wdata->cfile->invalidHandle) 2930 rc = -EAGAIN; 2931 else 2932 rc = wdata->server->ops->async_writev(wdata, 2933 cifs_writedata_release); 2934 if (rc >= 0) { 2935 kref_put(&wdata->refcount, cifs_writedata_release); 2936 goto err_close; 2937 } 2938 } else { 2939 /* The dirty region was entirely beyond the EOF. */ 2940 cifs_pages_written_back(inode, start, len); 2941 rc = 0; 2942 } 2943 2944 err_wdata: 2945 kref_put(&wdata->refcount, cifs_writedata_release); 2946 err_uncredit: 2947 add_credits_and_wake_if(server, credits, 0); 2948 err_close: 2949 if (cfile) 2950 cifsFileInfo_put(cfile); 2951 err_xid: 2952 free_xid(xid); 2953 if (rc == 0) { 2954 wbc->nr_to_write = count; 2955 rc = len; 2956 } else if (is_retryable_error(rc)) { 2957 cifs_pages_write_redirty(inode, start, len); 2958 } else { 2959 cifs_pages_write_failed(inode, start, len); 2960 mapping_set_error(mapping, rc); 2961 } 2962 /* Indication to update ctime and mtime as close is deferred */ 2963 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 2964 return rc; 2965 } 2966 2967 /* 2968 * write a region of pages back to the server 2969 */ 2970 static ssize_t cifs_writepages_begin(struct address_space *mapping, 2971 struct writeback_control *wbc, 2972 struct xa_state *xas, 2973 unsigned long long *_start, 2974 unsigned long long end) 2975 { 2976 struct folio *folio; 2977 unsigned long long start = *_start; 2978 ssize_t ret; 2979 int skips = 0; 2980 2981 search_again: 2982 /* Find the first dirty page. */ 2983 rcu_read_lock(); 2984 2985 for (;;) { 2986 folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 2987 if (xas_retry(xas, folio) || xa_is_value(folio)) 2988 continue; 2989 if (!folio) 2990 break; 2991 2992 if (!folio_try_get_rcu(folio)) { 2993 xas_reset(xas); 2994 continue; 2995 } 2996 2997 if (unlikely(folio != xas_reload(xas))) { 2998 folio_put(folio); 2999 xas_reset(xas); 3000 continue; 3001 } 3002 3003 xas_pause(xas); 3004 break; 3005 } 3006 rcu_read_unlock(); 3007 if (!folio) 3008 return 0; 3009 3010 start = folio_pos(folio); /* May regress with THPs */ 3011 3012 /* At this point we hold neither the i_pages lock nor the page lock: 3013 * the page may be truncated or invalidated (changing page->mapping to 3014 * NULL), or even swizzled back from swapper_space to tmpfs file 3015 * mapping 3016 */ 3017 lock_again: 3018 if (wbc->sync_mode != WB_SYNC_NONE) { 3019 ret = folio_lock_killable(folio); 3020 if (ret < 0) 3021 return ret; 3022 } else { 3023 if (!folio_trylock(folio)) 3024 goto search_again; 3025 } 3026 3027 if (folio->mapping != mapping || 3028 !folio_test_dirty(folio)) { 3029 start += folio_size(folio); 3030 folio_unlock(folio); 3031 goto search_again; 3032 } 3033 3034 if (folio_test_writeback(folio) || 3035 folio_test_fscache(folio)) { 3036 folio_unlock(folio); 3037 if (wbc->sync_mode != WB_SYNC_NONE) { 3038 folio_wait_writeback(folio); 3039 #ifdef CONFIG_CIFS_FSCACHE 3040 folio_wait_fscache(folio); 3041 #endif 3042 goto lock_again; 3043 } 3044 3045 start += folio_size(folio); 3046 if (wbc->sync_mode == WB_SYNC_NONE) { 3047 if (skips >= 5 || need_resched()) { 3048 ret = 0; 3049 goto out; 3050 } 3051 skips++; 3052 } 3053 goto search_again; 3054 } 3055 3056 ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end); 3057 out: 3058 if (ret > 0) 3059 *_start = start + ret; 3060 return ret; 3061 } 3062 3063 /* 3064 * Write a region of pages back to the server 3065 */ 3066 static int cifs_writepages_region(struct address_space *mapping, 3067 struct writeback_control *wbc, 3068 unsigned long long *_start, 3069 unsigned long long end) 3070 { 3071 ssize_t ret; 3072 3073 XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 3074 3075 do { 3076 ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end); 3077 if (ret > 0 && wbc->nr_to_write > 0) 3078 cond_resched(); 3079 } while (ret > 0 && wbc->nr_to_write > 0); 3080 3081 return ret > 0 ? 0 : ret; 3082 } 3083 3084 /* 3085 * Write some of the pending data back to the server 3086 */ 3087 static int cifs_writepages(struct address_space *mapping, 3088 struct writeback_control *wbc) 3089 { 3090 loff_t start, end; 3091 int ret; 3092 3093 /* We have to be careful as we can end up racing with setattr() 3094 * truncating the pagecache since the caller doesn't take a lock here 3095 * to prevent it. 3096 */ 3097 3098 if (wbc->range_cyclic && mapping->writeback_index) { 3099 start = mapping->writeback_index * PAGE_SIZE; 3100 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3101 if (ret < 0) 3102 goto out; 3103 3104 if (wbc->nr_to_write <= 0) { 3105 mapping->writeback_index = start / PAGE_SIZE; 3106 goto out; 3107 } 3108 3109 start = 0; 3110 end = mapping->writeback_index * PAGE_SIZE; 3111 mapping->writeback_index = 0; 3112 ret = cifs_writepages_region(mapping, wbc, &start, end); 3113 if (ret == 0) 3114 mapping->writeback_index = start / PAGE_SIZE; 3115 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 3116 start = 0; 3117 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3118 if (wbc->nr_to_write > 0 && ret == 0) 3119 mapping->writeback_index = start / PAGE_SIZE; 3120 } else { 3121 start = wbc->range_start; 3122 ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end); 3123 } 3124 3125 out: 3126 return ret; 3127 } 3128 3129 static int 3130 cifs_writepage_locked(struct page *page, struct writeback_control *wbc) 3131 { 3132 int rc; 3133 unsigned int xid; 3134 3135 xid = get_xid(); 3136 /* BB add check for wbc flags */ 3137 get_page(page); 3138 if (!PageUptodate(page)) 3139 cifs_dbg(FYI, "ppw - page not up to date\n"); 3140 3141 /* 3142 * Set the "writeback" flag, and clear "dirty" in the radix tree. 3143 * 3144 * A writepage() implementation always needs to do either this, 3145 * or re-dirty the page with "redirty_page_for_writepage()" in 3146 * the case of a failure. 3147 * 3148 * Just unlocking the page will cause the radix tree tag-bits 3149 * to fail to update with the state of the page correctly. 3150 */ 3151 set_page_writeback(page); 3152 retry_write: 3153 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE); 3154 if (is_retryable_error(rc)) { 3155 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) 3156 goto retry_write; 3157 redirty_page_for_writepage(wbc, page); 3158 } else if (rc != 0) { 3159 SetPageError(page); 3160 mapping_set_error(page->mapping, rc); 3161 } else { 3162 SetPageUptodate(page); 3163 } 3164 end_page_writeback(page); 3165 put_page(page); 3166 free_xid(xid); 3167 return rc; 3168 } 3169 3170 static int cifs_write_end(struct file *file, struct address_space *mapping, 3171 loff_t pos, unsigned len, unsigned copied, 3172 struct page *page, void *fsdata) 3173 { 3174 int rc; 3175 struct inode *inode = mapping->host; 3176 struct cifsFileInfo *cfile = file->private_data; 3177 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 3178 struct folio *folio = page_folio(page); 3179 __u32 pid; 3180 3181 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3182 pid = cfile->pid; 3183 else 3184 pid = current->tgid; 3185 3186 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", 3187 page, pos, copied); 3188 3189 if (folio_test_checked(folio)) { 3190 if (copied == len) 3191 folio_mark_uptodate(folio); 3192 folio_clear_checked(folio); 3193 } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE) 3194 folio_mark_uptodate(folio); 3195 3196 if (!folio_test_uptodate(folio)) { 3197 char *page_data; 3198 unsigned offset = pos & (PAGE_SIZE - 1); 3199 unsigned int xid; 3200 3201 xid = get_xid(); 3202 /* this is probably better than directly calling 3203 partialpage_write since in this function the file handle is 3204 known which we might as well leverage */ 3205 /* BB check if anything else missing out of ppw 3206 such as updating last write time */ 3207 page_data = kmap(page); 3208 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos); 3209 /* if (rc < 0) should we set writebehind rc? */ 3210 kunmap(page); 3211 3212 free_xid(xid); 3213 } else { 3214 rc = copied; 3215 pos += copied; 3216 set_page_dirty(page); 3217 } 3218 3219 if (rc > 0) { 3220 spin_lock(&inode->i_lock); 3221 if (pos > inode->i_size) { 3222 loff_t additional_blocks = (512 - 1 + copied) >> 9; 3223 3224 i_size_write(inode, pos); 3225 /* 3226 * Estimate new allocation size based on the amount written. 3227 * This will be updated from server on close (and on queryinfo) 3228 */ 3229 inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9, 3230 inode->i_blocks + additional_blocks); 3231 } 3232 spin_unlock(&inode->i_lock); 3233 } 3234 3235 unlock_page(page); 3236 put_page(page); 3237 /* Indication to update ctime and mtime as close is deferred */ 3238 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 3239 3240 return rc; 3241 } 3242 3243 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, 3244 int datasync) 3245 { 3246 unsigned int xid; 3247 int rc = 0; 3248 struct cifs_tcon *tcon; 3249 struct TCP_Server_Info *server; 3250 struct cifsFileInfo *smbfile = file->private_data; 3251 struct inode *inode = file_inode(file); 3252 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3253 3254 rc = file_write_and_wait_range(file, start, end); 3255 if (rc) { 3256 trace_cifs_fsync_err(inode->i_ino, rc); 3257 return rc; 3258 } 3259 3260 xid = get_xid(); 3261 3262 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3263 file, datasync); 3264 3265 if (!CIFS_CACHE_READ(CIFS_I(inode))) { 3266 rc = cifs_zap_mapping(inode); 3267 if (rc) { 3268 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); 3269 rc = 0; /* don't care about it in fsync */ 3270 } 3271 } 3272 3273 tcon = tlink_tcon(smbfile->tlink); 3274 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3275 server = tcon->ses->server; 3276 if (server->ops->flush == NULL) { 3277 rc = -ENOSYS; 3278 goto strict_fsync_exit; 3279 } 3280 3281 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3282 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3283 if (smbfile) { 3284 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3285 cifsFileInfo_put(smbfile); 3286 } else 3287 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3288 } else 3289 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3290 } 3291 3292 strict_fsync_exit: 3293 free_xid(xid); 3294 return rc; 3295 } 3296 3297 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 3298 { 3299 unsigned int xid; 3300 int rc = 0; 3301 struct cifs_tcon *tcon; 3302 struct TCP_Server_Info *server; 3303 struct cifsFileInfo *smbfile = file->private_data; 3304 struct inode *inode = file_inode(file); 3305 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); 3306 3307 rc = file_write_and_wait_range(file, start, end); 3308 if (rc) { 3309 trace_cifs_fsync_err(file_inode(file)->i_ino, rc); 3310 return rc; 3311 } 3312 3313 xid = get_xid(); 3314 3315 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3316 file, datasync); 3317 3318 tcon = tlink_tcon(smbfile->tlink); 3319 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3320 server = tcon->ses->server; 3321 if (server->ops->flush == NULL) { 3322 rc = -ENOSYS; 3323 goto fsync_exit; 3324 } 3325 3326 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3327 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3328 if (smbfile) { 3329 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3330 cifsFileInfo_put(smbfile); 3331 } else 3332 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3333 } else 3334 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3335 } 3336 3337 fsync_exit: 3338 free_xid(xid); 3339 return rc; 3340 } 3341 3342 /* 3343 * As file closes, flush all cached write data for this inode checking 3344 * for write behind errors. 3345 */ 3346 int cifs_flush(struct file *file, fl_owner_t id) 3347 { 3348 struct inode *inode = file_inode(file); 3349 int rc = 0; 3350 3351 if (file->f_mode & FMODE_WRITE) 3352 rc = filemap_write_and_wait(inode->i_mapping); 3353 3354 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); 3355 if (rc) { 3356 /* get more nuanced writeback errors */ 3357 rc = filemap_check_wb_err(file->f_mapping, 0); 3358 trace_cifs_flush_err(inode->i_ino, rc); 3359 } 3360 return rc; 3361 } 3362 3363 static void 3364 cifs_uncached_writedata_release(struct kref *refcount) 3365 { 3366 struct cifs_writedata *wdata = container_of(refcount, 3367 struct cifs_writedata, refcount); 3368 3369 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); 3370 cifs_writedata_release(refcount); 3371 } 3372 3373 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx); 3374 3375 static void 3376 cifs_uncached_writev_complete(struct work_struct *work) 3377 { 3378 struct cifs_writedata *wdata = container_of(work, 3379 struct cifs_writedata, work); 3380 struct inode *inode = d_inode(wdata->cfile->dentry); 3381 struct cifsInodeInfo *cifsi = CIFS_I(inode); 3382 3383 spin_lock(&inode->i_lock); 3384 cifs_update_eof(cifsi, wdata->offset, wdata->bytes); 3385 if (cifsi->netfs.remote_i_size > inode->i_size) 3386 i_size_write(inode, cifsi->netfs.remote_i_size); 3387 spin_unlock(&inode->i_lock); 3388 3389 complete(&wdata->done); 3390 collect_uncached_write_data(wdata->ctx); 3391 /* the below call can possibly free the last ref to aio ctx */ 3392 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3393 } 3394 3395 static int 3396 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, 3397 struct cifs_aio_ctx *ctx) 3398 { 3399 unsigned int wsize; 3400 struct cifs_credits credits; 3401 int rc; 3402 struct TCP_Server_Info *server = wdata->server; 3403 3404 do { 3405 if (wdata->cfile->invalidHandle) { 3406 rc = cifs_reopen_file(wdata->cfile, false); 3407 if (rc == -EAGAIN) 3408 continue; 3409 else if (rc) 3410 break; 3411 } 3412 3413 3414 /* 3415 * Wait for credits to resend this wdata. 3416 * Note: we are attempting to resend the whole wdata not in 3417 * segments 3418 */ 3419 do { 3420 rc = server->ops->wait_mtu_credits(server, wdata->bytes, 3421 &wsize, &credits); 3422 if (rc) 3423 goto fail; 3424 3425 if (wsize < wdata->bytes) { 3426 add_credits_and_wake_if(server, &credits, 0); 3427 msleep(1000); 3428 } 3429 } while (wsize < wdata->bytes); 3430 wdata->credits = credits; 3431 3432 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3433 3434 if (!rc) { 3435 if (wdata->cfile->invalidHandle) 3436 rc = -EAGAIN; 3437 else { 3438 wdata->replay = true; 3439 #ifdef CONFIG_CIFS_SMB_DIRECT 3440 if (wdata->mr) { 3441 wdata->mr->need_invalidate = true; 3442 smbd_deregister_mr(wdata->mr); 3443 wdata->mr = NULL; 3444 } 3445 #endif 3446 rc = server->ops->async_writev(wdata, 3447 cifs_uncached_writedata_release); 3448 } 3449 } 3450 3451 /* If the write was successfully sent, we are done */ 3452 if (!rc) { 3453 list_add_tail(&wdata->list, wdata_list); 3454 return 0; 3455 } 3456 3457 /* Roll back credits and retry if needed */ 3458 add_credits_and_wake_if(server, &wdata->credits, 0); 3459 } while (rc == -EAGAIN); 3460 3461 fail: 3462 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3463 return rc; 3464 } 3465 3466 /* 3467 * Select span of a bvec iterator we're going to use. Limit it by both maximum 3468 * size and maximum number of segments. 3469 */ 3470 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size, 3471 size_t max_segs, unsigned int *_nsegs) 3472 { 3473 const struct bio_vec *bvecs = iter->bvec; 3474 unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; 3475 size_t len, span = 0, n = iter->count; 3476 size_t skip = iter->iov_offset; 3477 3478 if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0) 3479 return 0; 3480 3481 while (n && ix < nbv && skip) { 3482 len = bvecs[ix].bv_len; 3483 if (skip < len) 3484 break; 3485 skip -= len; 3486 n -= len; 3487 ix++; 3488 } 3489 3490 while (n && ix < nbv) { 3491 len = min3(n, bvecs[ix].bv_len - skip, max_size); 3492 span += len; 3493 max_size -= len; 3494 nsegs++; 3495 ix++; 3496 if (max_size == 0 || nsegs >= max_segs) 3497 break; 3498 skip = 0; 3499 n -= len; 3500 } 3501 3502 *_nsegs = nsegs; 3503 return span; 3504 } 3505 3506 static int 3507 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from, 3508 struct cifsFileInfo *open_file, 3509 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, 3510 struct cifs_aio_ctx *ctx) 3511 { 3512 int rc = 0; 3513 size_t cur_len, max_len; 3514 struct cifs_writedata *wdata; 3515 pid_t pid; 3516 struct TCP_Server_Info *server; 3517 unsigned int xid, max_segs = INT_MAX; 3518 3519 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3520 pid = open_file->pid; 3521 else 3522 pid = current->tgid; 3523 3524 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 3525 xid = get_xid(); 3526 3527 #ifdef CONFIG_CIFS_SMB_DIRECT 3528 if (server->smbd_conn) 3529 max_segs = server->smbd_conn->max_frmr_depth; 3530 #endif 3531 3532 do { 3533 struct cifs_credits credits_on_stack; 3534 struct cifs_credits *credits = &credits_on_stack; 3535 unsigned int wsize, nsegs = 0; 3536 3537 if (signal_pending(current)) { 3538 rc = -EINTR; 3539 break; 3540 } 3541 3542 if (open_file->invalidHandle) { 3543 rc = cifs_reopen_file(open_file, false); 3544 if (rc == -EAGAIN) 3545 continue; 3546 else if (rc) 3547 break; 3548 } 3549 3550 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 3551 &wsize, credits); 3552 if (rc) 3553 break; 3554 3555 max_len = min_t(const size_t, len, wsize); 3556 if (!max_len) { 3557 rc = -EAGAIN; 3558 add_credits_and_wake_if(server, credits, 0); 3559 break; 3560 } 3561 3562 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs); 3563 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n", 3564 cur_len, max_len, nsegs, from->nr_segs, max_segs); 3565 if (cur_len == 0) { 3566 rc = -EIO; 3567 add_credits_and_wake_if(server, credits, 0); 3568 break; 3569 } 3570 3571 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete); 3572 if (!wdata) { 3573 rc = -ENOMEM; 3574 add_credits_and_wake_if(server, credits, 0); 3575 break; 3576 } 3577 3578 wdata->sync_mode = WB_SYNC_ALL; 3579 wdata->offset = (__u64)fpos; 3580 wdata->cfile = cifsFileInfo_get(open_file); 3581 wdata->server = server; 3582 wdata->pid = pid; 3583 wdata->bytes = cur_len; 3584 wdata->credits = credits_on_stack; 3585 wdata->iter = *from; 3586 wdata->ctx = ctx; 3587 kref_get(&ctx->refcount); 3588 3589 iov_iter_truncate(&wdata->iter, cur_len); 3590 3591 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3592 3593 if (!rc) { 3594 if (wdata->cfile->invalidHandle) 3595 rc = -EAGAIN; 3596 else 3597 rc = server->ops->async_writev(wdata, 3598 cifs_uncached_writedata_release); 3599 } 3600 3601 if (rc) { 3602 add_credits_and_wake_if(server, &wdata->credits, 0); 3603 kref_put(&wdata->refcount, 3604 cifs_uncached_writedata_release); 3605 if (rc == -EAGAIN) 3606 continue; 3607 break; 3608 } 3609 3610 list_add_tail(&wdata->list, wdata_list); 3611 iov_iter_advance(from, cur_len); 3612 fpos += cur_len; 3613 len -= cur_len; 3614 } while (len > 0); 3615 3616 free_xid(xid); 3617 return rc; 3618 } 3619 3620 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) 3621 { 3622 struct cifs_writedata *wdata, *tmp; 3623 struct cifs_tcon *tcon; 3624 struct cifs_sb_info *cifs_sb; 3625 struct dentry *dentry = ctx->cfile->dentry; 3626 ssize_t rc; 3627 3628 tcon = tlink_tcon(ctx->cfile->tlink); 3629 cifs_sb = CIFS_SB(dentry->d_sb); 3630 3631 mutex_lock(&ctx->aio_mutex); 3632 3633 if (list_empty(&ctx->list)) { 3634 mutex_unlock(&ctx->aio_mutex); 3635 return; 3636 } 3637 3638 rc = ctx->rc; 3639 /* 3640 * Wait for and collect replies for any successful sends in order of 3641 * increasing offset. Once an error is hit, then return without waiting 3642 * for any more replies. 3643 */ 3644 restart_loop: 3645 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) { 3646 if (!rc) { 3647 if (!try_wait_for_completion(&wdata->done)) { 3648 mutex_unlock(&ctx->aio_mutex); 3649 return; 3650 } 3651 3652 if (wdata->result) 3653 rc = wdata->result; 3654 else 3655 ctx->total_len += wdata->bytes; 3656 3657 /* resend call if it's a retryable error */ 3658 if (rc == -EAGAIN) { 3659 struct list_head tmp_list; 3660 struct iov_iter tmp_from = ctx->iter; 3661 3662 INIT_LIST_HEAD(&tmp_list); 3663 list_del_init(&wdata->list); 3664 3665 if (ctx->direct_io) 3666 rc = cifs_resend_wdata( 3667 wdata, &tmp_list, ctx); 3668 else { 3669 iov_iter_advance(&tmp_from, 3670 wdata->offset - ctx->pos); 3671 3672 rc = cifs_write_from_iter(wdata->offset, 3673 wdata->bytes, &tmp_from, 3674 ctx->cfile, cifs_sb, &tmp_list, 3675 ctx); 3676 3677 kref_put(&wdata->refcount, 3678 cifs_uncached_writedata_release); 3679 } 3680 3681 list_splice(&tmp_list, &ctx->list); 3682 goto restart_loop; 3683 } 3684 } 3685 list_del_init(&wdata->list); 3686 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3687 } 3688 3689 cifs_stats_bytes_written(tcon, ctx->total_len); 3690 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); 3691 3692 ctx->rc = (rc == 0) ? ctx->total_len : rc; 3693 3694 mutex_unlock(&ctx->aio_mutex); 3695 3696 if (ctx->iocb && ctx->iocb->ki_complete) 3697 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 3698 else 3699 complete(&ctx->done); 3700 } 3701 3702 static ssize_t __cifs_writev( 3703 struct kiocb *iocb, struct iov_iter *from, bool direct) 3704 { 3705 struct file *file = iocb->ki_filp; 3706 ssize_t total_written = 0; 3707 struct cifsFileInfo *cfile; 3708 struct cifs_tcon *tcon; 3709 struct cifs_sb_info *cifs_sb; 3710 struct cifs_aio_ctx *ctx; 3711 int rc; 3712 3713 rc = generic_write_checks(iocb, from); 3714 if (rc <= 0) 3715 return rc; 3716 3717 cifs_sb = CIFS_FILE_SB(file); 3718 cfile = file->private_data; 3719 tcon = tlink_tcon(cfile->tlink); 3720 3721 if (!tcon->ses->server->ops->async_writev) 3722 return -ENOSYS; 3723 3724 ctx = cifs_aio_ctx_alloc(); 3725 if (!ctx) 3726 return -ENOMEM; 3727 3728 ctx->cfile = cifsFileInfo_get(cfile); 3729 3730 if (!is_sync_kiocb(iocb)) 3731 ctx->iocb = iocb; 3732 3733 ctx->pos = iocb->ki_pos; 3734 ctx->direct_io = direct; 3735 ctx->nr_pinned_pages = 0; 3736 3737 if (user_backed_iter(from)) { 3738 /* 3739 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 3740 * they contain references to the calling process's virtual 3741 * memory layout which won't be available in an async worker 3742 * thread. This also takes a pin on every folio involved. 3743 */ 3744 rc = netfs_extract_user_iter(from, iov_iter_count(from), 3745 &ctx->iter, 0); 3746 if (rc < 0) { 3747 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3748 return rc; 3749 } 3750 3751 ctx->nr_pinned_pages = rc; 3752 ctx->bv = (void *)ctx->iter.bvec; 3753 ctx->bv_need_unpin = iov_iter_extract_will_pin(from); 3754 } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) && 3755 !is_sync_kiocb(iocb)) { 3756 /* 3757 * If the op is asynchronous, we need to copy the list attached 3758 * to a BVEC/KVEC-type iterator, but we assume that the storage 3759 * will be pinned by the caller; in any case, we may or may not 3760 * be able to pin the pages, so we don't try. 3761 */ 3762 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL); 3763 if (!ctx->bv) { 3764 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3765 return -ENOMEM; 3766 } 3767 } else { 3768 /* 3769 * Otherwise, we just pass the iterator down as-is and rely on 3770 * the caller to make sure the pages referred to by the 3771 * iterator don't evaporate. 3772 */ 3773 ctx->iter = *from; 3774 } 3775 3776 ctx->len = iov_iter_count(&ctx->iter); 3777 3778 /* grab a lock here due to read response handlers can access ctx */ 3779 mutex_lock(&ctx->aio_mutex); 3780 3781 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter, 3782 cfile, cifs_sb, &ctx->list, ctx); 3783 3784 /* 3785 * If at least one write was successfully sent, then discard any rc 3786 * value from the later writes. If the other write succeeds, then 3787 * we'll end up returning whatever was written. If it fails, then 3788 * we'll get a new rc value from that. 3789 */ 3790 if (!list_empty(&ctx->list)) 3791 rc = 0; 3792 3793 mutex_unlock(&ctx->aio_mutex); 3794 3795 if (rc) { 3796 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3797 return rc; 3798 } 3799 3800 if (!is_sync_kiocb(iocb)) { 3801 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3802 return -EIOCBQUEUED; 3803 } 3804 3805 rc = wait_for_completion_killable(&ctx->done); 3806 if (rc) { 3807 mutex_lock(&ctx->aio_mutex); 3808 ctx->rc = rc = -EINTR; 3809 total_written = ctx->total_len; 3810 mutex_unlock(&ctx->aio_mutex); 3811 } else { 3812 rc = ctx->rc; 3813 total_written = ctx->total_len; 3814 } 3815 3816 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3817 3818 if (unlikely(!total_written)) 3819 return rc; 3820 3821 iocb->ki_pos += total_written; 3822 return total_written; 3823 } 3824 3825 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) 3826 { 3827 struct file *file = iocb->ki_filp; 3828 3829 cifs_revalidate_mapping(file->f_inode); 3830 return __cifs_writev(iocb, from, true); 3831 } 3832 3833 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) 3834 { 3835 return __cifs_writev(iocb, from, false); 3836 } 3837 3838 static ssize_t 3839 cifs_writev(struct kiocb *iocb, struct iov_iter *from) 3840 { 3841 struct file *file = iocb->ki_filp; 3842 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 3843 struct inode *inode = file->f_mapping->host; 3844 struct cifsInodeInfo *cinode = CIFS_I(inode); 3845 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 3846 ssize_t rc; 3847 3848 inode_lock(inode); 3849 /* 3850 * We need to hold the sem to be sure nobody modifies lock list 3851 * with a brlock that prevents writing. 3852 */ 3853 down_read(&cinode->lock_sem); 3854 3855 rc = generic_write_checks(iocb, from); 3856 if (rc <= 0) 3857 goto out; 3858 3859 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), 3860 server->vals->exclusive_lock_type, 0, 3861 NULL, CIFS_WRITE_OP)) 3862 rc = __generic_file_write_iter(iocb, from); 3863 else 3864 rc = -EACCES; 3865 out: 3866 up_read(&cinode->lock_sem); 3867 inode_unlock(inode); 3868 3869 if (rc > 0) 3870 rc = generic_write_sync(iocb, rc); 3871 return rc; 3872 } 3873 3874 ssize_t 3875 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) 3876 { 3877 struct inode *inode = file_inode(iocb->ki_filp); 3878 struct cifsInodeInfo *cinode = CIFS_I(inode); 3879 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3880 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 3881 iocb->ki_filp->private_data; 3882 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 3883 ssize_t written; 3884 3885 written = cifs_get_writer(cinode); 3886 if (written) 3887 return written; 3888 3889 if (CIFS_CACHE_WRITE(cinode)) { 3890 if (cap_unix(tcon->ses) && 3891 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 3892 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 3893 written = generic_file_write_iter(iocb, from); 3894 goto out; 3895 } 3896 written = cifs_writev(iocb, from); 3897 goto out; 3898 } 3899 /* 3900 * For non-oplocked files in strict cache mode we need to write the data 3901 * to the server exactly from the pos to pos+len-1 rather than flush all 3902 * affected pages because it may cause a error with mandatory locks on 3903 * these pages but not on the region from pos to ppos+len-1. 3904 */ 3905 written = cifs_user_writev(iocb, from); 3906 if (CIFS_CACHE_READ(cinode)) { 3907 /* 3908 * We have read level caching and we have just sent a write 3909 * request to the server thus making data in the cache stale. 3910 * Zap the cache and set oplock/lease level to NONE to avoid 3911 * reading stale data from the cache. All subsequent read 3912 * operations will read new data from the server. 3913 */ 3914 cifs_zap_mapping(inode); 3915 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n", 3916 inode); 3917 cinode->oplock = 0; 3918 } 3919 out: 3920 cifs_put_writer(cinode); 3921 return written; 3922 } 3923 3924 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete) 3925 { 3926 struct cifs_readdata *rdata; 3927 3928 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); 3929 if (rdata) { 3930 kref_init(&rdata->refcount); 3931 INIT_LIST_HEAD(&rdata->list); 3932 init_completion(&rdata->done); 3933 INIT_WORK(&rdata->work, complete); 3934 } 3935 3936 return rdata; 3937 } 3938 3939 void 3940 cifs_readdata_release(struct kref *refcount) 3941 { 3942 struct cifs_readdata *rdata = container_of(refcount, 3943 struct cifs_readdata, refcount); 3944 3945 if (rdata->ctx) 3946 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); 3947 #ifdef CONFIG_CIFS_SMB_DIRECT 3948 if (rdata->mr) { 3949 smbd_deregister_mr(rdata->mr); 3950 rdata->mr = NULL; 3951 } 3952 #endif 3953 if (rdata->cfile) 3954 cifsFileInfo_put(rdata->cfile); 3955 3956 kfree(rdata); 3957 } 3958 3959 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx); 3960 3961 static void 3962 cifs_uncached_readv_complete(struct work_struct *work) 3963 { 3964 struct cifs_readdata *rdata = container_of(work, 3965 struct cifs_readdata, work); 3966 3967 complete(&rdata->done); 3968 collect_uncached_read_data(rdata->ctx); 3969 /* the below call can possibly free the last ref to aio ctx */ 3970 kref_put(&rdata->refcount, cifs_readdata_release); 3971 } 3972 3973 static int cifs_resend_rdata(struct cifs_readdata *rdata, 3974 struct list_head *rdata_list, 3975 struct cifs_aio_ctx *ctx) 3976 { 3977 unsigned int rsize; 3978 struct cifs_credits credits; 3979 int rc; 3980 struct TCP_Server_Info *server; 3981 3982 /* XXX: should we pick a new channel here? */ 3983 server = rdata->server; 3984 3985 do { 3986 if (rdata->cfile->invalidHandle) { 3987 rc = cifs_reopen_file(rdata->cfile, true); 3988 if (rc == -EAGAIN) 3989 continue; 3990 else if (rc) 3991 break; 3992 } 3993 3994 /* 3995 * Wait for credits to resend this rdata. 3996 * Note: we are attempting to resend the whole rdata not in 3997 * segments 3998 */ 3999 do { 4000 rc = server->ops->wait_mtu_credits(server, rdata->bytes, 4001 &rsize, &credits); 4002 4003 if (rc) 4004 goto fail; 4005 4006 if (rsize < rdata->bytes) { 4007 add_credits_and_wake_if(server, &credits, 0); 4008 msleep(1000); 4009 } 4010 } while (rsize < rdata->bytes); 4011 rdata->credits = credits; 4012 4013 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4014 if (!rc) { 4015 if (rdata->cfile->invalidHandle) 4016 rc = -EAGAIN; 4017 else { 4018 #ifdef CONFIG_CIFS_SMB_DIRECT 4019 if (rdata->mr) { 4020 rdata->mr->need_invalidate = true; 4021 smbd_deregister_mr(rdata->mr); 4022 rdata->mr = NULL; 4023 } 4024 #endif 4025 rc = server->ops->async_readv(rdata); 4026 } 4027 } 4028 4029 /* If the read was successfully sent, we are done */ 4030 if (!rc) { 4031 /* Add to aio pending list */ 4032 list_add_tail(&rdata->list, rdata_list); 4033 return 0; 4034 } 4035 4036 /* Roll back credits and retry if needed */ 4037 add_credits_and_wake_if(server, &rdata->credits, 0); 4038 } while (rc == -EAGAIN); 4039 4040 fail: 4041 kref_put(&rdata->refcount, cifs_readdata_release); 4042 return rc; 4043 } 4044 4045 static int 4046 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file, 4047 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, 4048 struct cifs_aio_ctx *ctx) 4049 { 4050 struct cifs_readdata *rdata; 4051 unsigned int rsize, nsegs, max_segs = INT_MAX; 4052 struct cifs_credits credits_on_stack; 4053 struct cifs_credits *credits = &credits_on_stack; 4054 size_t cur_len, max_len; 4055 int rc; 4056 pid_t pid; 4057 struct TCP_Server_Info *server; 4058 4059 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4060 4061 #ifdef CONFIG_CIFS_SMB_DIRECT 4062 if (server->smbd_conn) 4063 max_segs = server->smbd_conn->max_frmr_depth; 4064 #endif 4065 4066 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4067 pid = open_file->pid; 4068 else 4069 pid = current->tgid; 4070 4071 do { 4072 if (open_file->invalidHandle) { 4073 rc = cifs_reopen_file(open_file, true); 4074 if (rc == -EAGAIN) 4075 continue; 4076 else if (rc) 4077 break; 4078 } 4079 4080 if (cifs_sb->ctx->rsize == 0) 4081 cifs_sb->ctx->rsize = 4082 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4083 cifs_sb->ctx); 4084 4085 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4086 &rsize, credits); 4087 if (rc) 4088 break; 4089 4090 max_len = min_t(size_t, len, rsize); 4091 4092 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len, 4093 max_segs, &nsegs); 4094 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n", 4095 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs); 4096 if (cur_len == 0) { 4097 rc = -EIO; 4098 add_credits_and_wake_if(server, credits, 0); 4099 break; 4100 } 4101 4102 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete); 4103 if (!rdata) { 4104 add_credits_and_wake_if(server, credits, 0); 4105 rc = -ENOMEM; 4106 break; 4107 } 4108 4109 rdata->server = server; 4110 rdata->cfile = cifsFileInfo_get(open_file); 4111 rdata->offset = fpos; 4112 rdata->bytes = cur_len; 4113 rdata->pid = pid; 4114 rdata->credits = credits_on_stack; 4115 rdata->ctx = ctx; 4116 kref_get(&ctx->refcount); 4117 4118 rdata->iter = ctx->iter; 4119 iov_iter_truncate(&rdata->iter, cur_len); 4120 4121 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4122 4123 if (!rc) { 4124 if (rdata->cfile->invalidHandle) 4125 rc = -EAGAIN; 4126 else 4127 rc = server->ops->async_readv(rdata); 4128 } 4129 4130 if (rc) { 4131 add_credits_and_wake_if(server, &rdata->credits, 0); 4132 kref_put(&rdata->refcount, cifs_readdata_release); 4133 if (rc == -EAGAIN) 4134 continue; 4135 break; 4136 } 4137 4138 list_add_tail(&rdata->list, rdata_list); 4139 iov_iter_advance(&ctx->iter, cur_len); 4140 fpos += cur_len; 4141 len -= cur_len; 4142 } while (len > 0); 4143 4144 return rc; 4145 } 4146 4147 static void 4148 collect_uncached_read_data(struct cifs_aio_ctx *ctx) 4149 { 4150 struct cifs_readdata *rdata, *tmp; 4151 struct cifs_sb_info *cifs_sb; 4152 int rc; 4153 4154 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb); 4155 4156 mutex_lock(&ctx->aio_mutex); 4157 4158 if (list_empty(&ctx->list)) { 4159 mutex_unlock(&ctx->aio_mutex); 4160 return; 4161 } 4162 4163 rc = ctx->rc; 4164 /* the loop below should proceed in the order of increasing offsets */ 4165 again: 4166 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) { 4167 if (!rc) { 4168 if (!try_wait_for_completion(&rdata->done)) { 4169 mutex_unlock(&ctx->aio_mutex); 4170 return; 4171 } 4172 4173 if (rdata->result == -EAGAIN) { 4174 /* resend call if it's a retryable error */ 4175 struct list_head tmp_list; 4176 unsigned int got_bytes = rdata->got_bytes; 4177 4178 list_del_init(&rdata->list); 4179 INIT_LIST_HEAD(&tmp_list); 4180 4181 if (ctx->direct_io) { 4182 /* 4183 * Re-use rdata as this is a 4184 * direct I/O 4185 */ 4186 rc = cifs_resend_rdata( 4187 rdata, 4188 &tmp_list, ctx); 4189 } else { 4190 rc = cifs_send_async_read( 4191 rdata->offset + got_bytes, 4192 rdata->bytes - got_bytes, 4193 rdata->cfile, cifs_sb, 4194 &tmp_list, ctx); 4195 4196 kref_put(&rdata->refcount, 4197 cifs_readdata_release); 4198 } 4199 4200 list_splice(&tmp_list, &ctx->list); 4201 4202 goto again; 4203 } else if (rdata->result) 4204 rc = rdata->result; 4205 4206 /* if there was a short read -- discard anything left */ 4207 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) 4208 rc = -ENODATA; 4209 4210 ctx->total_len += rdata->got_bytes; 4211 } 4212 list_del_init(&rdata->list); 4213 kref_put(&rdata->refcount, cifs_readdata_release); 4214 } 4215 4216 /* mask nodata case */ 4217 if (rc == -ENODATA) 4218 rc = 0; 4219 4220 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; 4221 4222 mutex_unlock(&ctx->aio_mutex); 4223 4224 if (ctx->iocb && ctx->iocb->ki_complete) 4225 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 4226 else 4227 complete(&ctx->done); 4228 } 4229 4230 static ssize_t __cifs_readv( 4231 struct kiocb *iocb, struct iov_iter *to, bool direct) 4232 { 4233 size_t len; 4234 struct file *file = iocb->ki_filp; 4235 struct cifs_sb_info *cifs_sb; 4236 struct cifsFileInfo *cfile; 4237 struct cifs_tcon *tcon; 4238 ssize_t rc, total_read = 0; 4239 loff_t offset = iocb->ki_pos; 4240 struct cifs_aio_ctx *ctx; 4241 4242 len = iov_iter_count(to); 4243 if (!len) 4244 return 0; 4245 4246 cifs_sb = CIFS_FILE_SB(file); 4247 cfile = file->private_data; 4248 tcon = tlink_tcon(cfile->tlink); 4249 4250 if (!tcon->ses->server->ops->async_readv) 4251 return -ENOSYS; 4252 4253 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4254 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4255 4256 ctx = cifs_aio_ctx_alloc(); 4257 if (!ctx) 4258 return -ENOMEM; 4259 4260 ctx->pos = offset; 4261 ctx->direct_io = direct; 4262 ctx->len = len; 4263 ctx->cfile = cifsFileInfo_get(cfile); 4264 ctx->nr_pinned_pages = 0; 4265 4266 if (!is_sync_kiocb(iocb)) 4267 ctx->iocb = iocb; 4268 4269 if (user_backed_iter(to)) { 4270 /* 4271 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 4272 * they contain references to the calling process's virtual 4273 * memory layout which won't be available in an async worker 4274 * thread. This also takes a pin on every folio involved. 4275 */ 4276 rc = netfs_extract_user_iter(to, iov_iter_count(to), 4277 &ctx->iter, 0); 4278 if (rc < 0) { 4279 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4280 return rc; 4281 } 4282 4283 ctx->nr_pinned_pages = rc; 4284 ctx->bv = (void *)ctx->iter.bvec; 4285 ctx->bv_need_unpin = iov_iter_extract_will_pin(to); 4286 ctx->should_dirty = true; 4287 } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) && 4288 !is_sync_kiocb(iocb)) { 4289 /* 4290 * If the op is asynchronous, we need to copy the list attached 4291 * to a BVEC/KVEC-type iterator, but we assume that the storage 4292 * will be retained by the caller; in any case, we may or may 4293 * not be able to pin the pages, so we don't try. 4294 */ 4295 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL); 4296 if (!ctx->bv) { 4297 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4298 return -ENOMEM; 4299 } 4300 } else { 4301 /* 4302 * Otherwise, we just pass the iterator down as-is and rely on 4303 * the caller to make sure the pages referred to by the 4304 * iterator don't evaporate. 4305 */ 4306 ctx->iter = *to; 4307 } 4308 4309 if (direct) { 4310 rc = filemap_write_and_wait_range(file->f_inode->i_mapping, 4311 offset, offset + len - 1); 4312 if (rc) { 4313 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4314 return -EAGAIN; 4315 } 4316 } 4317 4318 /* grab a lock here due to read response handlers can access ctx */ 4319 mutex_lock(&ctx->aio_mutex); 4320 4321 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx); 4322 4323 /* if at least one read request send succeeded, then reset rc */ 4324 if (!list_empty(&ctx->list)) 4325 rc = 0; 4326 4327 mutex_unlock(&ctx->aio_mutex); 4328 4329 if (rc) { 4330 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4331 return rc; 4332 } 4333 4334 if (!is_sync_kiocb(iocb)) { 4335 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4336 return -EIOCBQUEUED; 4337 } 4338 4339 rc = wait_for_completion_killable(&ctx->done); 4340 if (rc) { 4341 mutex_lock(&ctx->aio_mutex); 4342 ctx->rc = rc = -EINTR; 4343 total_read = ctx->total_len; 4344 mutex_unlock(&ctx->aio_mutex); 4345 } else { 4346 rc = ctx->rc; 4347 total_read = ctx->total_len; 4348 } 4349 4350 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4351 4352 if (total_read) { 4353 iocb->ki_pos += total_read; 4354 return total_read; 4355 } 4356 return rc; 4357 } 4358 4359 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) 4360 { 4361 return __cifs_readv(iocb, to, true); 4362 } 4363 4364 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) 4365 { 4366 return __cifs_readv(iocb, to, false); 4367 } 4368 4369 ssize_t 4370 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) 4371 { 4372 struct inode *inode = file_inode(iocb->ki_filp); 4373 struct cifsInodeInfo *cinode = CIFS_I(inode); 4374 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 4375 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 4376 iocb->ki_filp->private_data; 4377 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 4378 int rc = -EACCES; 4379 4380 /* 4381 * In strict cache mode we need to read from the server all the time 4382 * if we don't have level II oplock because the server can delay mtime 4383 * change - so we can't make a decision about inode invalidating. 4384 * And we can also fail with pagereading if there are mandatory locks 4385 * on pages affected by this read but not on the region from pos to 4386 * pos+len-1. 4387 */ 4388 if (!CIFS_CACHE_READ(cinode)) 4389 return cifs_user_readv(iocb, to); 4390 4391 if (cap_unix(tcon->ses) && 4392 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 4393 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 4394 return generic_file_read_iter(iocb, to); 4395 4396 /* 4397 * We need to hold the sem to be sure nobody modifies lock list 4398 * with a brlock that prevents reading. 4399 */ 4400 down_read(&cinode->lock_sem); 4401 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), 4402 tcon->ses->server->vals->shared_lock_type, 4403 0, NULL, CIFS_READ_OP)) 4404 rc = generic_file_read_iter(iocb, to); 4405 up_read(&cinode->lock_sem); 4406 return rc; 4407 } 4408 4409 static ssize_t 4410 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) 4411 { 4412 int rc = -EACCES; 4413 unsigned int bytes_read = 0; 4414 unsigned int total_read; 4415 unsigned int current_read_size; 4416 unsigned int rsize; 4417 struct cifs_sb_info *cifs_sb; 4418 struct cifs_tcon *tcon; 4419 struct TCP_Server_Info *server; 4420 unsigned int xid; 4421 char *cur_offset; 4422 struct cifsFileInfo *open_file; 4423 struct cifs_io_parms io_parms = {0}; 4424 int buf_type = CIFS_NO_BUFFER; 4425 __u32 pid; 4426 4427 xid = get_xid(); 4428 cifs_sb = CIFS_FILE_SB(file); 4429 4430 /* FIXME: set up handlers for larger reads and/or convert to async */ 4431 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize); 4432 4433 if (file->private_data == NULL) { 4434 rc = -EBADF; 4435 free_xid(xid); 4436 return rc; 4437 } 4438 open_file = file->private_data; 4439 tcon = tlink_tcon(open_file->tlink); 4440 server = cifs_pick_channel(tcon->ses); 4441 4442 if (!server->ops->sync_read) { 4443 free_xid(xid); 4444 return -ENOSYS; 4445 } 4446 4447 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4448 pid = open_file->pid; 4449 else 4450 pid = current->tgid; 4451 4452 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4453 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4454 4455 for (total_read = 0, cur_offset = read_data; read_size > total_read; 4456 total_read += bytes_read, cur_offset += bytes_read) { 4457 do { 4458 current_read_size = min_t(uint, read_size - total_read, 4459 rsize); 4460 /* 4461 * For windows me and 9x we do not want to request more 4462 * than it negotiated since it will refuse the read 4463 * then. 4464 */ 4465 if (!(tcon->ses->capabilities & 4466 tcon->ses->server->vals->cap_large_files)) { 4467 current_read_size = min_t(uint, 4468 current_read_size, CIFSMaxBufSize); 4469 } 4470 if (open_file->invalidHandle) { 4471 rc = cifs_reopen_file(open_file, true); 4472 if (rc != 0) 4473 break; 4474 } 4475 io_parms.pid = pid; 4476 io_parms.tcon = tcon; 4477 io_parms.offset = *offset; 4478 io_parms.length = current_read_size; 4479 io_parms.server = server; 4480 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, 4481 &bytes_read, &cur_offset, 4482 &buf_type); 4483 } while (rc == -EAGAIN); 4484 4485 if (rc || (bytes_read == 0)) { 4486 if (total_read) { 4487 break; 4488 } else { 4489 free_xid(xid); 4490 return rc; 4491 } 4492 } else { 4493 cifs_stats_bytes_read(tcon, total_read); 4494 *offset += bytes_read; 4495 } 4496 } 4497 free_xid(xid); 4498 return total_read; 4499 } 4500 4501 /* 4502 * If the page is mmap'ed into a process' page tables, then we need to make 4503 * sure that it doesn't change while being written back. 4504 */ 4505 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) 4506 { 4507 struct folio *folio = page_folio(vmf->page); 4508 4509 /* Wait for the folio to be written to the cache before we allow it to 4510 * be modified. We then assume the entire folio will need writing back. 4511 */ 4512 #ifdef CONFIG_CIFS_FSCACHE 4513 if (folio_test_fscache(folio) && 4514 folio_wait_fscache_killable(folio) < 0) 4515 return VM_FAULT_RETRY; 4516 #endif 4517 4518 folio_wait_writeback(folio); 4519 4520 if (folio_lock_killable(folio) < 0) 4521 return VM_FAULT_RETRY; 4522 return VM_FAULT_LOCKED; 4523 } 4524 4525 static const struct vm_operations_struct cifs_file_vm_ops = { 4526 .fault = filemap_fault, 4527 .map_pages = filemap_map_pages, 4528 .page_mkwrite = cifs_page_mkwrite, 4529 }; 4530 4531 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 4532 { 4533 int xid, rc = 0; 4534 struct inode *inode = file_inode(file); 4535 4536 xid = get_xid(); 4537 4538 if (!CIFS_CACHE_READ(CIFS_I(inode))) 4539 rc = cifs_zap_mapping(inode); 4540 if (!rc) 4541 rc = generic_file_mmap(file, vma); 4542 if (!rc) 4543 vma->vm_ops = &cifs_file_vm_ops; 4544 4545 free_xid(xid); 4546 return rc; 4547 } 4548 4549 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) 4550 { 4551 int rc, xid; 4552 4553 xid = get_xid(); 4554 4555 rc = cifs_revalidate_file(file); 4556 if (rc) 4557 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", 4558 rc); 4559 if (!rc) 4560 rc = generic_file_mmap(file, vma); 4561 if (!rc) 4562 vma->vm_ops = &cifs_file_vm_ops; 4563 4564 free_xid(xid); 4565 return rc; 4566 } 4567 4568 /* 4569 * Unlock a bunch of folios in the pagecache. 4570 */ 4571 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last) 4572 { 4573 struct folio *folio; 4574 XA_STATE(xas, &mapping->i_pages, first); 4575 4576 rcu_read_lock(); 4577 xas_for_each(&xas, folio, last) { 4578 folio_unlock(folio); 4579 } 4580 rcu_read_unlock(); 4581 } 4582 4583 static void cifs_readahead_complete(struct work_struct *work) 4584 { 4585 struct cifs_readdata *rdata = container_of(work, 4586 struct cifs_readdata, work); 4587 struct folio *folio; 4588 pgoff_t last; 4589 bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes); 4590 4591 XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE); 4592 4593 if (good) 4594 cifs_readahead_to_fscache(rdata->mapping->host, 4595 rdata->offset, rdata->bytes); 4596 4597 if (iov_iter_count(&rdata->iter) > 0) 4598 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter); 4599 4600 last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE; 4601 4602 rcu_read_lock(); 4603 xas_for_each(&xas, folio, last) { 4604 if (good) { 4605 flush_dcache_folio(folio); 4606 folio_mark_uptodate(folio); 4607 } 4608 folio_unlock(folio); 4609 } 4610 rcu_read_unlock(); 4611 4612 kref_put(&rdata->refcount, cifs_readdata_release); 4613 } 4614 4615 static void cifs_readahead(struct readahead_control *ractl) 4616 { 4617 struct cifsFileInfo *open_file = ractl->file->private_data; 4618 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file); 4619 struct TCP_Server_Info *server; 4620 unsigned int xid, nr_pages, cache_nr_pages = 0; 4621 unsigned int ra_pages; 4622 pgoff_t next_cached = ULONG_MAX, ra_index; 4623 bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) && 4624 cifs_inode_cookie(ractl->mapping->host)->cache_priv; 4625 bool check_cache = caching; 4626 pid_t pid; 4627 int rc = 0; 4628 4629 /* Note that readahead_count() lags behind our dequeuing of pages from 4630 * the ractl, wo we have to keep track for ourselves. 4631 */ 4632 ra_pages = readahead_count(ractl); 4633 ra_index = readahead_index(ractl); 4634 4635 xid = get_xid(); 4636 4637 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4638 pid = open_file->pid; 4639 else 4640 pid = current->tgid; 4641 4642 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4643 4644 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", 4645 __func__, ractl->file, ractl->mapping, ra_pages); 4646 4647 /* 4648 * Chop the readahead request up into rsize-sized read requests. 4649 */ 4650 while ((nr_pages = ra_pages)) { 4651 unsigned int i, rsize; 4652 struct cifs_readdata *rdata; 4653 struct cifs_credits credits_on_stack; 4654 struct cifs_credits *credits = &credits_on_stack; 4655 struct folio *folio; 4656 pgoff_t fsize; 4657 4658 /* 4659 * Find out if we have anything cached in the range of 4660 * interest, and if so, where the next chunk of cached data is. 4661 */ 4662 if (caching) { 4663 if (check_cache) { 4664 rc = cifs_fscache_query_occupancy( 4665 ractl->mapping->host, ra_index, nr_pages, 4666 &next_cached, &cache_nr_pages); 4667 if (rc < 0) 4668 caching = false; 4669 check_cache = false; 4670 } 4671 4672 if (ra_index == next_cached) { 4673 /* 4674 * TODO: Send a whole batch of pages to be read 4675 * by the cache. 4676 */ 4677 folio = readahead_folio(ractl); 4678 fsize = folio_nr_pages(folio); 4679 ra_pages -= fsize; 4680 ra_index += fsize; 4681 if (cifs_readpage_from_fscache(ractl->mapping->host, 4682 &folio->page) < 0) { 4683 /* 4684 * TODO: Deal with cache read failure 4685 * here, but for the moment, delegate 4686 * that to readpage. 4687 */ 4688 caching = false; 4689 } 4690 folio_unlock(folio); 4691 next_cached += fsize; 4692 cache_nr_pages -= fsize; 4693 if (cache_nr_pages == 0) 4694 check_cache = true; 4695 continue; 4696 } 4697 } 4698 4699 if (open_file->invalidHandle) { 4700 rc = cifs_reopen_file(open_file, true); 4701 if (rc) { 4702 if (rc == -EAGAIN) 4703 continue; 4704 break; 4705 } 4706 } 4707 4708 if (cifs_sb->ctx->rsize == 0) 4709 cifs_sb->ctx->rsize = 4710 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4711 cifs_sb->ctx); 4712 4713 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4714 &rsize, credits); 4715 if (rc) 4716 break; 4717 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages); 4718 if (next_cached != ULONG_MAX) 4719 nr_pages = min_t(size_t, nr_pages, next_cached - ra_index); 4720 4721 /* 4722 * Give up immediately if rsize is too small to read an entire 4723 * page. The VFS will fall back to readpage. We should never 4724 * reach this point however since we set ra_pages to 0 when the 4725 * rsize is smaller than a cache page. 4726 */ 4727 if (unlikely(!nr_pages)) { 4728 add_credits_and_wake_if(server, credits, 0); 4729 break; 4730 } 4731 4732 rdata = cifs_readdata_alloc(cifs_readahead_complete); 4733 if (!rdata) { 4734 /* best to give up if we're out of mem */ 4735 add_credits_and_wake_if(server, credits, 0); 4736 break; 4737 } 4738 4739 rdata->offset = ra_index * PAGE_SIZE; 4740 rdata->bytes = nr_pages * PAGE_SIZE; 4741 rdata->cfile = cifsFileInfo_get(open_file); 4742 rdata->server = server; 4743 rdata->mapping = ractl->mapping; 4744 rdata->pid = pid; 4745 rdata->credits = credits_on_stack; 4746 4747 for (i = 0; i < nr_pages; i++) { 4748 if (!readahead_folio(ractl)) 4749 WARN_ON(1); 4750 } 4751 ra_pages -= nr_pages; 4752 ra_index += nr_pages; 4753 4754 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages, 4755 rdata->offset, rdata->bytes); 4756 4757 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4758 if (!rc) { 4759 if (rdata->cfile->invalidHandle) 4760 rc = -EAGAIN; 4761 else 4762 rc = server->ops->async_readv(rdata); 4763 } 4764 4765 if (rc) { 4766 add_credits_and_wake_if(server, &rdata->credits, 0); 4767 cifs_unlock_folios(rdata->mapping, 4768 rdata->offset / PAGE_SIZE, 4769 (rdata->offset + rdata->bytes - 1) / PAGE_SIZE); 4770 /* Fallback to the readpage in error/reconnect cases */ 4771 kref_put(&rdata->refcount, cifs_readdata_release); 4772 break; 4773 } 4774 4775 kref_put(&rdata->refcount, cifs_readdata_release); 4776 } 4777 4778 free_xid(xid); 4779 } 4780 4781 /* 4782 * cifs_readpage_worker must be called with the page pinned 4783 */ 4784 static int cifs_readpage_worker(struct file *file, struct page *page, 4785 loff_t *poffset) 4786 { 4787 struct inode *inode = file_inode(file); 4788 struct timespec64 atime, mtime; 4789 char *read_data; 4790 int rc; 4791 4792 /* Is the page cached? */ 4793 rc = cifs_readpage_from_fscache(inode, page); 4794 if (rc == 0) 4795 goto read_complete; 4796 4797 read_data = kmap(page); 4798 /* for reads over a certain size could initiate async read ahead */ 4799 4800 rc = cifs_read(file, read_data, PAGE_SIZE, poffset); 4801 4802 if (rc < 0) 4803 goto io_error; 4804 else 4805 cifs_dbg(FYI, "Bytes read %d\n", rc); 4806 4807 /* we do not want atime to be less than mtime, it broke some apps */ 4808 atime = inode_set_atime_to_ts(inode, current_time(inode)); 4809 mtime = inode_get_mtime(inode); 4810 if (timespec64_compare(&atime, &mtime) < 0) 4811 inode_set_atime_to_ts(inode, inode_get_mtime(inode)); 4812 4813 if (PAGE_SIZE > rc) 4814 memset(read_data + rc, 0, PAGE_SIZE - rc); 4815 4816 flush_dcache_page(page); 4817 SetPageUptodate(page); 4818 rc = 0; 4819 4820 io_error: 4821 kunmap(page); 4822 4823 read_complete: 4824 unlock_page(page); 4825 return rc; 4826 } 4827 4828 static int cifs_read_folio(struct file *file, struct folio *folio) 4829 { 4830 struct page *page = &folio->page; 4831 loff_t offset = page_file_offset(page); 4832 int rc = -EACCES; 4833 unsigned int xid; 4834 4835 xid = get_xid(); 4836 4837 if (file->private_data == NULL) { 4838 rc = -EBADF; 4839 free_xid(xid); 4840 return rc; 4841 } 4842 4843 cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n", 4844 page, (int)offset, (int)offset); 4845 4846 rc = cifs_readpage_worker(file, page, &offset); 4847 4848 free_xid(xid); 4849 return rc; 4850 } 4851 4852 static int is_inode_writable(struct cifsInodeInfo *cifs_inode) 4853 { 4854 struct cifsFileInfo *open_file; 4855 4856 spin_lock(&cifs_inode->open_file_lock); 4857 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 4858 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 4859 spin_unlock(&cifs_inode->open_file_lock); 4860 return 1; 4861 } 4862 } 4863 spin_unlock(&cifs_inode->open_file_lock); 4864 return 0; 4865 } 4866 4867 /* We do not want to update the file size from server for inodes 4868 open for write - to avoid races with writepage extending 4869 the file - in the future we could consider allowing 4870 refreshing the inode only on increases in the file size 4871 but this is tricky to do without racing with writebehind 4872 page caching in the current Linux kernel design */ 4873 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file, 4874 bool from_readdir) 4875 { 4876 if (!cifsInode) 4877 return true; 4878 4879 if (is_inode_writable(cifsInode) || 4880 ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) { 4881 /* This inode is open for write at least once */ 4882 struct cifs_sb_info *cifs_sb; 4883 4884 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb); 4885 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 4886 /* since no page cache to corrupt on directio 4887 we can change size safely */ 4888 return true; 4889 } 4890 4891 if (i_size_read(&cifsInode->netfs.inode) < end_of_file) 4892 return true; 4893 4894 return false; 4895 } else 4896 return true; 4897 } 4898 4899 static int cifs_write_begin(struct file *file, struct address_space *mapping, 4900 loff_t pos, unsigned len, 4901 struct page **pagep, void **fsdata) 4902 { 4903 int oncethru = 0; 4904 pgoff_t index = pos >> PAGE_SHIFT; 4905 loff_t offset = pos & (PAGE_SIZE - 1); 4906 loff_t page_start = pos & PAGE_MASK; 4907 loff_t i_size; 4908 struct page *page; 4909 int rc = 0; 4910 4911 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); 4912 4913 start: 4914 page = grab_cache_page_write_begin(mapping, index); 4915 if (!page) { 4916 rc = -ENOMEM; 4917 goto out; 4918 } 4919 4920 if (PageUptodate(page)) 4921 goto out; 4922 4923 /* 4924 * If we write a full page it will be up to date, no need to read from 4925 * the server. If the write is short, we'll end up doing a sync write 4926 * instead. 4927 */ 4928 if (len == PAGE_SIZE) 4929 goto out; 4930 4931 /* 4932 * optimize away the read when we have an oplock, and we're not 4933 * expecting to use any of the data we'd be reading in. That 4934 * is, when the page lies beyond the EOF, or straddles the EOF 4935 * and the write will cover all of the existing data. 4936 */ 4937 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { 4938 i_size = i_size_read(mapping->host); 4939 if (page_start >= i_size || 4940 (offset == 0 && (pos + len) >= i_size)) { 4941 zero_user_segments(page, 0, offset, 4942 offset + len, 4943 PAGE_SIZE); 4944 /* 4945 * PageChecked means that the parts of the page 4946 * to which we're not writing are considered up 4947 * to date. Once the data is copied to the 4948 * page, it can be set uptodate. 4949 */ 4950 SetPageChecked(page); 4951 goto out; 4952 } 4953 } 4954 4955 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) { 4956 /* 4957 * might as well read a page, it is fast enough. If we get 4958 * an error, we don't need to return it. cifs_write_end will 4959 * do a sync write instead since PG_uptodate isn't set. 4960 */ 4961 cifs_readpage_worker(file, page, &page_start); 4962 put_page(page); 4963 oncethru = 1; 4964 goto start; 4965 } else { 4966 /* we could try using another file handle if there is one - 4967 but how would we lock it to prevent close of that handle 4968 racing with this read? In any case 4969 this will be written out by write_end so is fine */ 4970 } 4971 out: 4972 *pagep = page; 4973 return rc; 4974 } 4975 4976 static bool cifs_release_folio(struct folio *folio, gfp_t gfp) 4977 { 4978 if (folio_test_private(folio)) 4979 return 0; 4980 if (folio_test_fscache(folio)) { 4981 if (current_is_kswapd() || !(gfp & __GFP_FS)) 4982 return false; 4983 folio_wait_fscache(folio); 4984 } 4985 fscache_note_page_release(cifs_inode_cookie(folio->mapping->host)); 4986 return true; 4987 } 4988 4989 static void cifs_invalidate_folio(struct folio *folio, size_t offset, 4990 size_t length) 4991 { 4992 folio_wait_fscache(folio); 4993 } 4994 4995 static int cifs_launder_folio(struct folio *folio) 4996 { 4997 int rc = 0; 4998 loff_t range_start = folio_pos(folio); 4999 loff_t range_end = range_start + folio_size(folio); 5000 struct writeback_control wbc = { 5001 .sync_mode = WB_SYNC_ALL, 5002 .nr_to_write = 0, 5003 .range_start = range_start, 5004 .range_end = range_end, 5005 }; 5006 5007 cifs_dbg(FYI, "Launder page: %lu\n", folio->index); 5008 5009 if (folio_clear_dirty_for_io(folio)) 5010 rc = cifs_writepage_locked(&folio->page, &wbc); 5011 5012 folio_wait_fscache(folio); 5013 return rc; 5014 } 5015 5016 void cifs_oplock_break(struct work_struct *work) 5017 { 5018 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 5019 oplock_break); 5020 struct inode *inode = d_inode(cfile->dentry); 5021 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 5022 struct cifsInodeInfo *cinode = CIFS_I(inode); 5023 struct cifs_tcon *tcon; 5024 struct TCP_Server_Info *server; 5025 struct tcon_link *tlink; 5026 int rc = 0; 5027 bool purge_cache = false, oplock_break_cancelled; 5028 __u64 persistent_fid, volatile_fid; 5029 __u16 net_fid; 5030 5031 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, 5032 TASK_UNINTERRUPTIBLE); 5033 5034 tlink = cifs_sb_tlink(cifs_sb); 5035 if (IS_ERR(tlink)) 5036 goto out; 5037 tcon = tlink_tcon(tlink); 5038 server = tcon->ses->server; 5039 5040 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level, 5041 cfile->oplock_epoch, &purge_cache); 5042 5043 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && 5044 cifs_has_mand_locks(cinode)) { 5045 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", 5046 inode); 5047 cinode->oplock = 0; 5048 } 5049 5050 if (inode && S_ISREG(inode->i_mode)) { 5051 if (CIFS_CACHE_READ(cinode)) 5052 break_lease(inode, O_RDONLY); 5053 else 5054 break_lease(inode, O_WRONLY); 5055 rc = filemap_fdatawrite(inode->i_mapping); 5056 if (!CIFS_CACHE_READ(cinode) || purge_cache) { 5057 rc = filemap_fdatawait(inode->i_mapping); 5058 mapping_set_error(inode->i_mapping, rc); 5059 cifs_zap_mapping(inode); 5060 } 5061 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc); 5062 if (CIFS_CACHE_WRITE(cinode)) 5063 goto oplock_break_ack; 5064 } 5065 5066 rc = cifs_push_locks(cfile); 5067 if (rc) 5068 cifs_dbg(VFS, "Push locks rc = %d\n", rc); 5069 5070 oplock_break_ack: 5071 /* 5072 * When oplock break is received and there are no active 5073 * file handles but cached, then schedule deferred close immediately. 5074 * So, new open will not use cached handle. 5075 */ 5076 5077 if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes)) 5078 cifs_close_deferred_file(cinode); 5079 5080 persistent_fid = cfile->fid.persistent_fid; 5081 volatile_fid = cfile->fid.volatile_fid; 5082 net_fid = cfile->fid.netfid; 5083 oplock_break_cancelled = cfile->oplock_break_cancelled; 5084 5085 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); 5086 /* 5087 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require 5088 * an acknowledgment to be sent when the file has already been closed. 5089 */ 5090 spin_lock(&cinode->open_file_lock); 5091 /* check list empty since can race with kill_sb calling tree disconnect */ 5092 if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) { 5093 spin_unlock(&cinode->open_file_lock); 5094 rc = server->ops->oplock_response(tcon, persistent_fid, 5095 volatile_fid, net_fid, cinode); 5096 cifs_dbg(FYI, "Oplock release rc = %d\n", rc); 5097 } else 5098 spin_unlock(&cinode->open_file_lock); 5099 5100 cifs_put_tlink(tlink); 5101 out: 5102 cifs_done_oplock_break(cinode); 5103 } 5104 5105 /* 5106 * The presence of cifs_direct_io() in the address space ops vector 5107 * allowes open() O_DIRECT flags which would have failed otherwise. 5108 * 5109 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests 5110 * so this method should never be called. 5111 * 5112 * Direct IO is not yet supported in the cached mode. 5113 */ 5114 static ssize_t 5115 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) 5116 { 5117 /* 5118 * FIXME 5119 * Eventually need to support direct IO for non forcedirectio mounts 5120 */ 5121 return -EINVAL; 5122 } 5123 5124 static int cifs_swap_activate(struct swap_info_struct *sis, 5125 struct file *swap_file, sector_t *span) 5126 { 5127 struct cifsFileInfo *cfile = swap_file->private_data; 5128 struct inode *inode = swap_file->f_mapping->host; 5129 unsigned long blocks; 5130 long long isize; 5131 5132 cifs_dbg(FYI, "swap activate\n"); 5133 5134 if (!swap_file->f_mapping->a_ops->swap_rw) 5135 /* Cannot support swap */ 5136 return -EINVAL; 5137 5138 spin_lock(&inode->i_lock); 5139 blocks = inode->i_blocks; 5140 isize = inode->i_size; 5141 spin_unlock(&inode->i_lock); 5142 if (blocks*512 < isize) { 5143 pr_warn("swap activate: swapfile has holes\n"); 5144 return -EINVAL; 5145 } 5146 *span = sis->pages; 5147 5148 pr_warn_once("Swap support over SMB3 is experimental\n"); 5149 5150 /* 5151 * TODO: consider adding ACL (or documenting how) to prevent other 5152 * users (on this or other systems) from reading it 5153 */ 5154 5155 5156 /* TODO: add sk_set_memalloc(inet) or similar */ 5157 5158 if (cfile) 5159 cfile->swapfile = true; 5160 /* 5161 * TODO: Since file already open, we can't open with DENY_ALL here 5162 * but we could add call to grab a byte range lock to prevent others 5163 * from reading or writing the file 5164 */ 5165 5166 sis->flags |= SWP_FS_OPS; 5167 return add_swap_extent(sis, 0, sis->max, 0); 5168 } 5169 5170 static void cifs_swap_deactivate(struct file *file) 5171 { 5172 struct cifsFileInfo *cfile = file->private_data; 5173 5174 cifs_dbg(FYI, "swap deactivate\n"); 5175 5176 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */ 5177 5178 if (cfile) 5179 cfile->swapfile = false; 5180 5181 /* do we need to unpin (or unlock) the file */ 5182 } 5183 5184 const struct address_space_operations cifs_addr_ops = { 5185 .read_folio = cifs_read_folio, 5186 .readahead = cifs_readahead, 5187 .writepages = cifs_writepages, 5188 .write_begin = cifs_write_begin, 5189 .write_end = cifs_write_end, 5190 .dirty_folio = netfs_dirty_folio, 5191 .release_folio = cifs_release_folio, 5192 .direct_IO = cifs_direct_io, 5193 .invalidate_folio = cifs_invalidate_folio, 5194 .launder_folio = cifs_launder_folio, 5195 .migrate_folio = filemap_migrate_folio, 5196 /* 5197 * TODO: investigate and if useful we could add an is_dirty_writeback 5198 * helper if needed 5199 */ 5200 .swap_activate = cifs_swap_activate, 5201 .swap_deactivate = cifs_swap_deactivate, 5202 }; 5203 5204 /* 5205 * cifs_readahead requires the server to support a buffer large enough to 5206 * contain the header plus one complete page of data. Otherwise, we need 5207 * to leave cifs_readahead out of the address space operations. 5208 */ 5209 const struct address_space_operations cifs_addr_ops_smallbuf = { 5210 .read_folio = cifs_read_folio, 5211 .writepages = cifs_writepages, 5212 .write_begin = cifs_write_begin, 5213 .write_end = cifs_write_end, 5214 .dirty_folio = netfs_dirty_folio, 5215 .release_folio = cifs_release_folio, 5216 .invalidate_folio = cifs_invalidate_folio, 5217 .launder_folio = cifs_launder_folio, 5218 .migrate_folio = filemap_migrate_folio, 5219 }; 5220