xref: /linux/fs/smb/client/file.c (revision 4b660dbd9ee2059850fd30e0df420ca7a38a1856)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39 
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45 	struct address_space *mapping = inode->i_mapping;
46 	struct folio *folio;
47 	pgoff_t end;
48 
49 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50 
51 	rcu_read_lock();
52 
53 	end = (start + len - 1) / PAGE_SIZE;
54 	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55 		if (xas_retry(&xas, folio))
56 			continue;
57 		xas_pause(&xas);
58 		rcu_read_unlock();
59 		folio_lock(folio);
60 		folio_clear_dirty_for_io(folio);
61 		folio_unlock(folio);
62 		rcu_read_lock();
63 	}
64 
65 	rcu_read_unlock();
66 }
67 
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73 	struct address_space *mapping = inode->i_mapping;
74 	struct folio *folio;
75 	pgoff_t end;
76 
77 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78 
79 	if (!len)
80 		return;
81 
82 	rcu_read_lock();
83 
84 	end = (start + len - 1) / PAGE_SIZE;
85 	xas_for_each(&xas, folio, end) {
86 		if (xas_retry(&xas, folio))
87 			continue;
88 		if (!folio_test_writeback(folio)) {
89 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90 				  len, start, folio->index, end);
91 			continue;
92 		}
93 
94 		folio_detach_private(folio);
95 		folio_end_writeback(folio);
96 	}
97 
98 	rcu_read_unlock();
99 }
100 
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106 	struct address_space *mapping = inode->i_mapping;
107 	struct folio *folio;
108 	pgoff_t end;
109 
110 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111 
112 	if (!len)
113 		return;
114 
115 	rcu_read_lock();
116 
117 	end = (start + len - 1) / PAGE_SIZE;
118 	xas_for_each(&xas, folio, end) {
119 		if (xas_retry(&xas, folio))
120 			continue;
121 		if (!folio_test_writeback(folio)) {
122 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123 				  len, start, folio->index, end);
124 			continue;
125 		}
126 
127 		folio_set_error(folio);
128 		folio_end_writeback(folio);
129 	}
130 
131 	rcu_read_unlock();
132 }
133 
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139 	struct address_space *mapping = inode->i_mapping;
140 	struct folio *folio;
141 	pgoff_t end;
142 
143 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144 
145 	if (!len)
146 		return;
147 
148 	rcu_read_lock();
149 
150 	end = (start + len - 1) / PAGE_SIZE;
151 	xas_for_each(&xas, folio, end) {
152 		if (!folio_test_writeback(folio)) {
153 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154 				  len, start, folio->index, end);
155 			continue;
156 		}
157 
158 		filemap_dirty_folio(folio->mapping, folio);
159 		folio_end_writeback(folio);
160 	}
161 
162 	rcu_read_unlock();
163 }
164 
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172 	struct cifsFileInfo *open_file = NULL;
173 	struct list_head *tmp;
174 	struct list_head *tmp1;
175 
176 	/* only send once per connect */
177 	spin_lock(&tcon->tc_lock);
178 	if (tcon->need_reconnect)
179 		tcon->status = TID_NEED_RECON;
180 
181 	if (tcon->status != TID_NEED_RECON) {
182 		spin_unlock(&tcon->tc_lock);
183 		return;
184 	}
185 	tcon->status = TID_IN_FILES_INVALIDATE;
186 	spin_unlock(&tcon->tc_lock);
187 
188 	/* list all files open on tree connection and mark them invalid */
189 	spin_lock(&tcon->open_file_lock);
190 	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192 		open_file->invalidHandle = true;
193 		open_file->oplock_break_cancelled = true;
194 	}
195 	spin_unlock(&tcon->open_file_lock);
196 
197 	invalidate_all_cached_dirs(tcon);
198 	spin_lock(&tcon->tc_lock);
199 	if (tcon->status == TID_IN_FILES_INVALIDATE)
200 		tcon->status = TID_NEED_TCON;
201 	spin_unlock(&tcon->tc_lock);
202 
203 	/*
204 	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205 	 * to this tcon.
206 	 */
207 }
208 
209 static inline int cifs_convert_flags(unsigned int flags)
210 {
211 	if ((flags & O_ACCMODE) == O_RDONLY)
212 		return GENERIC_READ;
213 	else if ((flags & O_ACCMODE) == O_WRONLY)
214 		return GENERIC_WRITE;
215 	else if ((flags & O_ACCMODE) == O_RDWR) {
216 		/* GENERIC_ALL is too much permission to request
217 		   can cause unnecessary access denied on create */
218 		/* return GENERIC_ALL; */
219 		return (GENERIC_READ | GENERIC_WRITE);
220 	}
221 
222 	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223 		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224 		FILE_READ_DATA);
225 }
226 
227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228 static u32 cifs_posix_convert_flags(unsigned int flags)
229 {
230 	u32 posix_flags = 0;
231 
232 	if ((flags & O_ACCMODE) == O_RDONLY)
233 		posix_flags = SMB_O_RDONLY;
234 	else if ((flags & O_ACCMODE) == O_WRONLY)
235 		posix_flags = SMB_O_WRONLY;
236 	else if ((flags & O_ACCMODE) == O_RDWR)
237 		posix_flags = SMB_O_RDWR;
238 
239 	if (flags & O_CREAT) {
240 		posix_flags |= SMB_O_CREAT;
241 		if (flags & O_EXCL)
242 			posix_flags |= SMB_O_EXCL;
243 	} else if (flags & O_EXCL)
244 		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245 			 current->comm, current->tgid);
246 
247 	if (flags & O_TRUNC)
248 		posix_flags |= SMB_O_TRUNC;
249 	/* be safe and imply O_SYNC for O_DSYNC */
250 	if (flags & O_DSYNC)
251 		posix_flags |= SMB_O_SYNC;
252 	if (flags & O_DIRECTORY)
253 		posix_flags |= SMB_O_DIRECTORY;
254 	if (flags & O_NOFOLLOW)
255 		posix_flags |= SMB_O_NOFOLLOW;
256 	if (flags & O_DIRECT)
257 		posix_flags |= SMB_O_DIRECT;
258 
259 	return posix_flags;
260 }
261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262 
263 static inline int cifs_get_disposition(unsigned int flags)
264 {
265 	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266 		return FILE_CREATE;
267 	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268 		return FILE_OVERWRITE_IF;
269 	else if ((flags & O_CREAT) == O_CREAT)
270 		return FILE_OPEN_IF;
271 	else if ((flags & O_TRUNC) == O_TRUNC)
272 		return FILE_OVERWRITE;
273 	else
274 		return FILE_OPEN;
275 }
276 
277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278 int cifs_posix_open(const char *full_path, struct inode **pinode,
279 			struct super_block *sb, int mode, unsigned int f_flags,
280 			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
281 {
282 	int rc;
283 	FILE_UNIX_BASIC_INFO *presp_data;
284 	__u32 posix_flags = 0;
285 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286 	struct cifs_fattr fattr;
287 	struct tcon_link *tlink;
288 	struct cifs_tcon *tcon;
289 
290 	cifs_dbg(FYI, "posix open %s\n", full_path);
291 
292 	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293 	if (presp_data == NULL)
294 		return -ENOMEM;
295 
296 	tlink = cifs_sb_tlink(cifs_sb);
297 	if (IS_ERR(tlink)) {
298 		rc = PTR_ERR(tlink);
299 		goto posix_open_ret;
300 	}
301 
302 	tcon = tlink_tcon(tlink);
303 	mode &= ~current_umask();
304 
305 	posix_flags = cifs_posix_convert_flags(f_flags);
306 	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307 			     poplock, full_path, cifs_sb->local_nls,
308 			     cifs_remap(cifs_sb));
309 	cifs_put_tlink(tlink);
310 
311 	if (rc)
312 		goto posix_open_ret;
313 
314 	if (presp_data->Type == cpu_to_le32(-1))
315 		goto posix_open_ret; /* open ok, caller does qpathinfo */
316 
317 	if (!pinode)
318 		goto posix_open_ret; /* caller does not need info */
319 
320 	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321 
322 	/* get new inode and set it up */
323 	if (*pinode == NULL) {
324 		cifs_fill_uniqueid(sb, &fattr);
325 		*pinode = cifs_iget(sb, &fattr);
326 		if (!*pinode) {
327 			rc = -ENOMEM;
328 			goto posix_open_ret;
329 		}
330 	} else {
331 		cifs_revalidate_mapping(*pinode);
332 		rc = cifs_fattr_to_inode(*pinode, &fattr, false);
333 	}
334 
335 posix_open_ret:
336 	kfree(presp_data);
337 	return rc;
338 }
339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340 
341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342 			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343 			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344 {
345 	int rc;
346 	int desired_access;
347 	int disposition;
348 	int create_options = CREATE_NOT_DIR;
349 	struct TCP_Server_Info *server = tcon->ses->server;
350 	struct cifs_open_parms oparms;
351 
352 	if (!server->ops->open)
353 		return -ENOSYS;
354 
355 	desired_access = cifs_convert_flags(f_flags);
356 
357 /*********************************************************************
358  *  open flag mapping table:
359  *
360  *	POSIX Flag            CIFS Disposition
361  *	----------            ----------------
362  *	O_CREAT               FILE_OPEN_IF
363  *	O_CREAT | O_EXCL      FILE_CREATE
364  *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
365  *	O_TRUNC               FILE_OVERWRITE
366  *	none of the above     FILE_OPEN
367  *
368  *	Note that there is not a direct match between disposition
369  *	FILE_SUPERSEDE (ie create whether or not file exists although
370  *	O_CREAT | O_TRUNC is similar but truncates the existing
371  *	file rather than creating a new file as FILE_SUPERSEDE does
372  *	(which uses the attributes / metadata passed in on open call)
373  *?
374  *?  O_SYNC is a reasonable match to CIFS writethrough flag
375  *?  and the read write flags match reasonably.  O_LARGEFILE
376  *?  is irrelevant because largefile support is always used
377  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
378  *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
379  *********************************************************************/
380 
381 	disposition = cifs_get_disposition(f_flags);
382 
383 	/* BB pass O_SYNC flag through on file attributes .. BB */
384 
385 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
386 	if (f_flags & O_SYNC)
387 		create_options |= CREATE_WRITE_THROUGH;
388 
389 	if (f_flags & O_DIRECT)
390 		create_options |= CREATE_NO_BUFFER;
391 
392 	oparms = (struct cifs_open_parms) {
393 		.tcon = tcon,
394 		.cifs_sb = cifs_sb,
395 		.desired_access = desired_access,
396 		.create_options = cifs_create_options(cifs_sb, create_options),
397 		.disposition = disposition,
398 		.path = full_path,
399 		.fid = fid,
400 	};
401 
402 	rc = server->ops->open(xid, &oparms, oplock, buf);
403 	if (rc)
404 		return rc;
405 
406 	/* TODO: Add support for calling posix query info but with passing in fid */
407 	if (tcon->unix_ext)
408 		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
409 					      xid);
410 	else
411 		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
412 					 xid, fid);
413 
414 	if (rc) {
415 		server->ops->close(xid, tcon, fid);
416 		if (rc == -ESTALE)
417 			rc = -EOPENSTALE;
418 	}
419 
420 	return rc;
421 }
422 
423 static bool
424 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
425 {
426 	struct cifs_fid_locks *cur;
427 	bool has_locks = false;
428 
429 	down_read(&cinode->lock_sem);
430 	list_for_each_entry(cur, &cinode->llist, llist) {
431 		if (!list_empty(&cur->locks)) {
432 			has_locks = true;
433 			break;
434 		}
435 	}
436 	up_read(&cinode->lock_sem);
437 	return has_locks;
438 }
439 
440 void
441 cifs_down_write(struct rw_semaphore *sem)
442 {
443 	while (!down_write_trylock(sem))
444 		msleep(10);
445 }
446 
447 static void cifsFileInfo_put_work(struct work_struct *work);
448 
449 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
450 				       struct tcon_link *tlink, __u32 oplock,
451 				       const char *symlink_target)
452 {
453 	struct dentry *dentry = file_dentry(file);
454 	struct inode *inode = d_inode(dentry);
455 	struct cifsInodeInfo *cinode = CIFS_I(inode);
456 	struct cifsFileInfo *cfile;
457 	struct cifs_fid_locks *fdlocks;
458 	struct cifs_tcon *tcon = tlink_tcon(tlink);
459 	struct TCP_Server_Info *server = tcon->ses->server;
460 
461 	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
462 	if (cfile == NULL)
463 		return cfile;
464 
465 	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
466 	if (!fdlocks) {
467 		kfree(cfile);
468 		return NULL;
469 	}
470 
471 	if (symlink_target) {
472 		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
473 		if (!cfile->symlink_target) {
474 			kfree(fdlocks);
475 			kfree(cfile);
476 			return NULL;
477 		}
478 	}
479 
480 	INIT_LIST_HEAD(&fdlocks->locks);
481 	fdlocks->cfile = cfile;
482 	cfile->llist = fdlocks;
483 
484 	cfile->count = 1;
485 	cfile->pid = current->tgid;
486 	cfile->uid = current_fsuid();
487 	cfile->dentry = dget(dentry);
488 	cfile->f_flags = file->f_flags;
489 	cfile->invalidHandle = false;
490 	cfile->deferred_close_scheduled = false;
491 	cfile->tlink = cifs_get_tlink(tlink);
492 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
493 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
494 	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
495 	mutex_init(&cfile->fh_mutex);
496 	spin_lock_init(&cfile->file_info_lock);
497 
498 	cifs_sb_active(inode->i_sb);
499 
500 	/*
501 	 * If the server returned a read oplock and we have mandatory brlocks,
502 	 * set oplock level to None.
503 	 */
504 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
505 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
506 		oplock = 0;
507 	}
508 
509 	cifs_down_write(&cinode->lock_sem);
510 	list_add(&fdlocks->llist, &cinode->llist);
511 	up_write(&cinode->lock_sem);
512 
513 	spin_lock(&tcon->open_file_lock);
514 	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
515 		oplock = fid->pending_open->oplock;
516 	list_del(&fid->pending_open->olist);
517 
518 	fid->purge_cache = false;
519 	server->ops->set_fid(cfile, fid, oplock);
520 
521 	list_add(&cfile->tlist, &tcon->openFileList);
522 	atomic_inc(&tcon->num_local_opens);
523 
524 	/* if readable file instance put first in list*/
525 	spin_lock(&cinode->open_file_lock);
526 	if (file->f_mode & FMODE_READ)
527 		list_add(&cfile->flist, &cinode->openFileList);
528 	else
529 		list_add_tail(&cfile->flist, &cinode->openFileList);
530 	spin_unlock(&cinode->open_file_lock);
531 	spin_unlock(&tcon->open_file_lock);
532 
533 	if (fid->purge_cache)
534 		cifs_zap_mapping(inode);
535 
536 	file->private_data = cfile;
537 	return cfile;
538 }
539 
540 struct cifsFileInfo *
541 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
542 {
543 	spin_lock(&cifs_file->file_info_lock);
544 	cifsFileInfo_get_locked(cifs_file);
545 	spin_unlock(&cifs_file->file_info_lock);
546 	return cifs_file;
547 }
548 
549 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
550 {
551 	struct inode *inode = d_inode(cifs_file->dentry);
552 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
553 	struct cifsLockInfo *li, *tmp;
554 	struct super_block *sb = inode->i_sb;
555 
556 	/*
557 	 * Delete any outstanding lock records. We'll lose them when the file
558 	 * is closed anyway.
559 	 */
560 	cifs_down_write(&cifsi->lock_sem);
561 	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
562 		list_del(&li->llist);
563 		cifs_del_lock_waiters(li);
564 		kfree(li);
565 	}
566 	list_del(&cifs_file->llist->llist);
567 	kfree(cifs_file->llist);
568 	up_write(&cifsi->lock_sem);
569 
570 	cifs_put_tlink(cifs_file->tlink);
571 	dput(cifs_file->dentry);
572 	cifs_sb_deactive(sb);
573 	kfree(cifs_file->symlink_target);
574 	kfree(cifs_file);
575 }
576 
577 static void cifsFileInfo_put_work(struct work_struct *work)
578 {
579 	struct cifsFileInfo *cifs_file = container_of(work,
580 			struct cifsFileInfo, put);
581 
582 	cifsFileInfo_put_final(cifs_file);
583 }
584 
585 /**
586  * cifsFileInfo_put - release a reference of file priv data
587  *
588  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
589  *
590  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
591  */
592 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
593 {
594 	_cifsFileInfo_put(cifs_file, true, true);
595 }
596 
597 /**
598  * _cifsFileInfo_put - release a reference of file priv data
599  *
600  * This may involve closing the filehandle @cifs_file out on the
601  * server. Must be called without holding tcon->open_file_lock,
602  * cinode->open_file_lock and cifs_file->file_info_lock.
603  *
604  * If @wait_for_oplock_handler is true and we are releasing the last
605  * reference, wait for any running oplock break handler of the file
606  * and cancel any pending one.
607  *
608  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
609  * @wait_oplock_handler: must be false if called from oplock_break_handler
610  * @offload:	not offloaded on close and oplock breaks
611  *
612  */
613 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
614 		       bool wait_oplock_handler, bool offload)
615 {
616 	struct inode *inode = d_inode(cifs_file->dentry);
617 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
618 	struct TCP_Server_Info *server = tcon->ses->server;
619 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
620 	struct super_block *sb = inode->i_sb;
621 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
622 	struct cifs_fid fid = {};
623 	struct cifs_pending_open open;
624 	bool oplock_break_cancelled;
625 
626 	spin_lock(&tcon->open_file_lock);
627 	spin_lock(&cifsi->open_file_lock);
628 	spin_lock(&cifs_file->file_info_lock);
629 	if (--cifs_file->count > 0) {
630 		spin_unlock(&cifs_file->file_info_lock);
631 		spin_unlock(&cifsi->open_file_lock);
632 		spin_unlock(&tcon->open_file_lock);
633 		return;
634 	}
635 	spin_unlock(&cifs_file->file_info_lock);
636 
637 	if (server->ops->get_lease_key)
638 		server->ops->get_lease_key(inode, &fid);
639 
640 	/* store open in pending opens to make sure we don't miss lease break */
641 	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
642 
643 	/* remove it from the lists */
644 	list_del(&cifs_file->flist);
645 	list_del(&cifs_file->tlist);
646 	atomic_dec(&tcon->num_local_opens);
647 
648 	if (list_empty(&cifsi->openFileList)) {
649 		cifs_dbg(FYI, "closing last open instance for inode %p\n",
650 			 d_inode(cifs_file->dentry));
651 		/*
652 		 * In strict cache mode we need invalidate mapping on the last
653 		 * close  because it may cause a error when we open this file
654 		 * again and get at least level II oplock.
655 		 */
656 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
657 			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
658 		cifs_set_oplock_level(cifsi, 0);
659 	}
660 
661 	spin_unlock(&cifsi->open_file_lock);
662 	spin_unlock(&tcon->open_file_lock);
663 
664 	oplock_break_cancelled = wait_oplock_handler ?
665 		cancel_work_sync(&cifs_file->oplock_break) : false;
666 
667 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
668 		struct TCP_Server_Info *server = tcon->ses->server;
669 		unsigned int xid;
670 
671 		xid = get_xid();
672 		if (server->ops->close_getattr)
673 			server->ops->close_getattr(xid, tcon, cifs_file);
674 		else if (server->ops->close)
675 			server->ops->close(xid, tcon, &cifs_file->fid);
676 		_free_xid(xid);
677 	}
678 
679 	if (oplock_break_cancelled)
680 		cifs_done_oplock_break(cifsi);
681 
682 	cifs_del_pending_open(&open);
683 
684 	if (offload)
685 		queue_work(fileinfo_put_wq, &cifs_file->put);
686 	else
687 		cifsFileInfo_put_final(cifs_file);
688 }
689 
690 int cifs_open(struct inode *inode, struct file *file)
691 
692 {
693 	int rc = -EACCES;
694 	unsigned int xid;
695 	__u32 oplock;
696 	struct cifs_sb_info *cifs_sb;
697 	struct TCP_Server_Info *server;
698 	struct cifs_tcon *tcon;
699 	struct tcon_link *tlink;
700 	struct cifsFileInfo *cfile = NULL;
701 	void *page;
702 	const char *full_path;
703 	bool posix_open_ok = false;
704 	struct cifs_fid fid = {};
705 	struct cifs_pending_open open;
706 	struct cifs_open_info_data data = {};
707 
708 	xid = get_xid();
709 
710 	cifs_sb = CIFS_SB(inode->i_sb);
711 	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
712 		free_xid(xid);
713 		return -EIO;
714 	}
715 
716 	tlink = cifs_sb_tlink(cifs_sb);
717 	if (IS_ERR(tlink)) {
718 		free_xid(xid);
719 		return PTR_ERR(tlink);
720 	}
721 	tcon = tlink_tcon(tlink);
722 	server = tcon->ses->server;
723 
724 	page = alloc_dentry_path();
725 	full_path = build_path_from_dentry(file_dentry(file), page);
726 	if (IS_ERR(full_path)) {
727 		rc = PTR_ERR(full_path);
728 		goto out;
729 	}
730 
731 	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
732 		 inode, file->f_flags, full_path);
733 
734 	if (file->f_flags & O_DIRECT &&
735 	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
736 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
737 			file->f_op = &cifs_file_direct_nobrl_ops;
738 		else
739 			file->f_op = &cifs_file_direct_ops;
740 	}
741 
742 	/* Get the cached handle as SMB2 close is deferred */
743 	rc = cifs_get_readable_path(tcon, full_path, &cfile);
744 	if (rc == 0) {
745 		if (file->f_flags == cfile->f_flags) {
746 			file->private_data = cfile;
747 			spin_lock(&CIFS_I(inode)->deferred_lock);
748 			cifs_del_deferred_close(cfile);
749 			spin_unlock(&CIFS_I(inode)->deferred_lock);
750 			goto use_cache;
751 		} else {
752 			_cifsFileInfo_put(cfile, true, false);
753 		}
754 	}
755 
756 	if (server->oplocks)
757 		oplock = REQ_OPLOCK;
758 	else
759 		oplock = 0;
760 
761 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
762 	if (!tcon->broken_posix_open && tcon->unix_ext &&
763 	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
764 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
765 		/* can not refresh inode info since size could be stale */
766 		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
767 				cifs_sb->ctx->file_mode /* ignored */,
768 				file->f_flags, &oplock, &fid.netfid, xid);
769 		if (rc == 0) {
770 			cifs_dbg(FYI, "posix open succeeded\n");
771 			posix_open_ok = true;
772 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
773 			if (tcon->ses->serverNOS)
774 				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
775 					 tcon->ses->ip_addr,
776 					 tcon->ses->serverNOS);
777 			tcon->broken_posix_open = true;
778 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
779 			 (rc != -EOPNOTSUPP)) /* path not found or net err */
780 			goto out;
781 		/*
782 		 * Else fallthrough to retry open the old way on network i/o
783 		 * or DFS errors.
784 		 */
785 	}
786 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
787 
788 	if (server->ops->get_lease_key)
789 		server->ops->get_lease_key(inode, &fid);
790 
791 	cifs_add_pending_open(&fid, tlink, &open);
792 
793 	if (!posix_open_ok) {
794 		if (server->ops->get_lease_key)
795 			server->ops->get_lease_key(inode, &fid);
796 
797 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
798 				  xid, &data);
799 		if (rc) {
800 			cifs_del_pending_open(&open);
801 			goto out;
802 		}
803 	}
804 
805 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
806 	if (cfile == NULL) {
807 		if (server->ops->close)
808 			server->ops->close(xid, tcon, &fid);
809 		cifs_del_pending_open(&open);
810 		rc = -ENOMEM;
811 		goto out;
812 	}
813 
814 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
815 	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
816 		/*
817 		 * Time to set mode which we can not set earlier due to
818 		 * problems creating new read-only files.
819 		 */
820 		struct cifs_unix_set_info_args args = {
821 			.mode	= inode->i_mode,
822 			.uid	= INVALID_UID, /* no change */
823 			.gid	= INVALID_GID, /* no change */
824 			.ctime	= NO_CHANGE_64,
825 			.atime	= NO_CHANGE_64,
826 			.mtime	= NO_CHANGE_64,
827 			.device	= 0,
828 		};
829 		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
830 				       cfile->pid);
831 	}
832 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
833 
834 use_cache:
835 	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
836 			   file->f_mode & FMODE_WRITE);
837 	if (file->f_flags & O_DIRECT &&
838 	    (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
839 	     file->f_flags & O_APPEND))
840 		cifs_invalidate_cache(file_inode(file),
841 				      FSCACHE_INVAL_DIO_WRITE);
842 
843 out:
844 	free_dentry_path(page);
845 	free_xid(xid);
846 	cifs_put_tlink(tlink);
847 	cifs_free_open_info(&data);
848 	return rc;
849 }
850 
851 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
852 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
853 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
854 
855 /*
856  * Try to reacquire byte range locks that were released when session
857  * to server was lost.
858  */
859 static int
860 cifs_relock_file(struct cifsFileInfo *cfile)
861 {
862 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
863 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
864 	int rc = 0;
865 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
866 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
867 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
868 
869 	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
870 	if (cinode->can_cache_brlcks) {
871 		/* can cache locks - no need to relock */
872 		up_read(&cinode->lock_sem);
873 		return rc;
874 	}
875 
876 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
877 	if (cap_unix(tcon->ses) &&
878 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
879 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
880 		rc = cifs_push_posix_locks(cfile);
881 	else
882 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
883 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
884 
885 	up_read(&cinode->lock_sem);
886 	return rc;
887 }
888 
889 static int
890 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
891 {
892 	int rc = -EACCES;
893 	unsigned int xid;
894 	__u32 oplock;
895 	struct cifs_sb_info *cifs_sb;
896 	struct cifs_tcon *tcon;
897 	struct TCP_Server_Info *server;
898 	struct cifsInodeInfo *cinode;
899 	struct inode *inode;
900 	void *page;
901 	const char *full_path;
902 	int desired_access;
903 	int disposition = FILE_OPEN;
904 	int create_options = CREATE_NOT_DIR;
905 	struct cifs_open_parms oparms;
906 
907 	xid = get_xid();
908 	mutex_lock(&cfile->fh_mutex);
909 	if (!cfile->invalidHandle) {
910 		mutex_unlock(&cfile->fh_mutex);
911 		free_xid(xid);
912 		return 0;
913 	}
914 
915 	inode = d_inode(cfile->dentry);
916 	cifs_sb = CIFS_SB(inode->i_sb);
917 	tcon = tlink_tcon(cfile->tlink);
918 	server = tcon->ses->server;
919 
920 	/*
921 	 * Can not grab rename sem here because various ops, including those
922 	 * that already have the rename sem can end up causing writepage to get
923 	 * called and if the server was down that means we end up here, and we
924 	 * can never tell if the caller already has the rename_sem.
925 	 */
926 	page = alloc_dentry_path();
927 	full_path = build_path_from_dentry(cfile->dentry, page);
928 	if (IS_ERR(full_path)) {
929 		mutex_unlock(&cfile->fh_mutex);
930 		free_dentry_path(page);
931 		free_xid(xid);
932 		return PTR_ERR(full_path);
933 	}
934 
935 	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
936 		 inode, cfile->f_flags, full_path);
937 
938 	if (tcon->ses->server->oplocks)
939 		oplock = REQ_OPLOCK;
940 	else
941 		oplock = 0;
942 
943 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
944 	if (tcon->unix_ext && cap_unix(tcon->ses) &&
945 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
946 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
947 		/*
948 		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
949 		 * original open. Must mask them off for a reopen.
950 		 */
951 		unsigned int oflags = cfile->f_flags &
952 						~(O_CREAT | O_EXCL | O_TRUNC);
953 
954 		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
955 				     cifs_sb->ctx->file_mode /* ignored */,
956 				     oflags, &oplock, &cfile->fid.netfid, xid);
957 		if (rc == 0) {
958 			cifs_dbg(FYI, "posix reopen succeeded\n");
959 			oparms.reconnect = true;
960 			goto reopen_success;
961 		}
962 		/*
963 		 * fallthrough to retry open the old way on errors, especially
964 		 * in the reconnect path it is important to retry hard
965 		 */
966 	}
967 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
968 
969 	desired_access = cifs_convert_flags(cfile->f_flags);
970 
971 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
972 	if (cfile->f_flags & O_SYNC)
973 		create_options |= CREATE_WRITE_THROUGH;
974 
975 	if (cfile->f_flags & O_DIRECT)
976 		create_options |= CREATE_NO_BUFFER;
977 
978 	if (server->ops->get_lease_key)
979 		server->ops->get_lease_key(inode, &cfile->fid);
980 
981 	oparms = (struct cifs_open_parms) {
982 		.tcon = tcon,
983 		.cifs_sb = cifs_sb,
984 		.desired_access = desired_access,
985 		.create_options = cifs_create_options(cifs_sb, create_options),
986 		.disposition = disposition,
987 		.path = full_path,
988 		.fid = &cfile->fid,
989 		.reconnect = true,
990 	};
991 
992 	/*
993 	 * Can not refresh inode by passing in file_info buf to be returned by
994 	 * ops->open and then calling get_inode_info with returned buf since
995 	 * file might have write behind data that needs to be flushed and server
996 	 * version of file size can be stale. If we knew for sure that inode was
997 	 * not dirty locally we could do this.
998 	 */
999 	rc = server->ops->open(xid, &oparms, &oplock, NULL);
1000 	if (rc == -ENOENT && oparms.reconnect == false) {
1001 		/* durable handle timeout is expired - open the file again */
1002 		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1003 		/* indicate that we need to relock the file */
1004 		oparms.reconnect = true;
1005 	}
1006 
1007 	if (rc) {
1008 		mutex_unlock(&cfile->fh_mutex);
1009 		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1010 		cifs_dbg(FYI, "oplock: %d\n", oplock);
1011 		goto reopen_error_exit;
1012 	}
1013 
1014 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1015 reopen_success:
1016 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1017 	cfile->invalidHandle = false;
1018 	mutex_unlock(&cfile->fh_mutex);
1019 	cinode = CIFS_I(inode);
1020 
1021 	if (can_flush) {
1022 		rc = filemap_write_and_wait(inode->i_mapping);
1023 		if (!is_interrupt_error(rc))
1024 			mapping_set_error(inode->i_mapping, rc);
1025 
1026 		if (tcon->posix_extensions) {
1027 			rc = smb311_posix_get_inode_info(&inode, full_path,
1028 							 NULL, inode->i_sb, xid);
1029 		} else if (tcon->unix_ext) {
1030 			rc = cifs_get_inode_info_unix(&inode, full_path,
1031 						      inode->i_sb, xid);
1032 		} else {
1033 			rc = cifs_get_inode_info(&inode, full_path, NULL,
1034 						 inode->i_sb, xid, NULL);
1035 		}
1036 	}
1037 	/*
1038 	 * Else we are writing out data to server already and could deadlock if
1039 	 * we tried to flush data, and since we do not know if we have data that
1040 	 * would invalidate the current end of file on the server we can not go
1041 	 * to the server to get the new inode info.
1042 	 */
1043 
1044 	/*
1045 	 * If the server returned a read oplock and we have mandatory brlocks,
1046 	 * set oplock level to None.
1047 	 */
1048 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1049 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1050 		oplock = 0;
1051 	}
1052 
1053 	server->ops->set_fid(cfile, &cfile->fid, oplock);
1054 	if (oparms.reconnect)
1055 		cifs_relock_file(cfile);
1056 
1057 reopen_error_exit:
1058 	free_dentry_path(page);
1059 	free_xid(xid);
1060 	return rc;
1061 }
1062 
1063 void smb2_deferred_work_close(struct work_struct *work)
1064 {
1065 	struct cifsFileInfo *cfile = container_of(work,
1066 			struct cifsFileInfo, deferred.work);
1067 
1068 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1069 	cifs_del_deferred_close(cfile);
1070 	cfile->deferred_close_scheduled = false;
1071 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1072 	_cifsFileInfo_put(cfile, true, false);
1073 }
1074 
1075 static bool
1076 smb2_can_defer_close(struct inode *inode, struct cifs_deferred_close *dclose)
1077 {
1078 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1079 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1080 
1081 	return (cifs_sb->ctx->closetimeo && cinode->lease_granted && dclose &&
1082 			(cinode->oplock == CIFS_CACHE_RHW_FLG ||
1083 			 cinode->oplock == CIFS_CACHE_RH_FLG) &&
1084 			!test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags));
1085 
1086 }
1087 
1088 int cifs_close(struct inode *inode, struct file *file)
1089 {
1090 	struct cifsFileInfo *cfile;
1091 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1092 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1093 	struct cifs_deferred_close *dclose;
1094 
1095 	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1096 
1097 	if (file->private_data != NULL) {
1098 		cfile = file->private_data;
1099 		file->private_data = NULL;
1100 		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1101 		if ((cfile->status_file_deleted == false) &&
1102 		    (smb2_can_defer_close(inode, dclose))) {
1103 			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1104 				inode_set_mtime_to_ts(inode,
1105 						      inode_set_ctime_current(inode));
1106 			}
1107 			spin_lock(&cinode->deferred_lock);
1108 			cifs_add_deferred_close(cfile, dclose);
1109 			if (cfile->deferred_close_scheduled &&
1110 			    delayed_work_pending(&cfile->deferred)) {
1111 				/*
1112 				 * If there is no pending work, mod_delayed_work queues new work.
1113 				 * So, Increase the ref count to avoid use-after-free.
1114 				 */
1115 				if (!mod_delayed_work(deferredclose_wq,
1116 						&cfile->deferred, cifs_sb->ctx->closetimeo))
1117 					cifsFileInfo_get(cfile);
1118 			} else {
1119 				/* Deferred close for files */
1120 				queue_delayed_work(deferredclose_wq,
1121 						&cfile->deferred, cifs_sb->ctx->closetimeo);
1122 				cfile->deferred_close_scheduled = true;
1123 				spin_unlock(&cinode->deferred_lock);
1124 				return 0;
1125 			}
1126 			spin_unlock(&cinode->deferred_lock);
1127 			_cifsFileInfo_put(cfile, true, false);
1128 		} else {
1129 			_cifsFileInfo_put(cfile, true, false);
1130 			kfree(dclose);
1131 		}
1132 	}
1133 
1134 	/* return code from the ->release op is always ignored */
1135 	return 0;
1136 }
1137 
1138 void
1139 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1140 {
1141 	struct cifsFileInfo *open_file, *tmp;
1142 	struct list_head tmp_list;
1143 
1144 	if (!tcon->use_persistent || !tcon->need_reopen_files)
1145 		return;
1146 
1147 	tcon->need_reopen_files = false;
1148 
1149 	cifs_dbg(FYI, "Reopen persistent handles\n");
1150 	INIT_LIST_HEAD(&tmp_list);
1151 
1152 	/* list all files open on tree connection, reopen resilient handles  */
1153 	spin_lock(&tcon->open_file_lock);
1154 	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1155 		if (!open_file->invalidHandle)
1156 			continue;
1157 		cifsFileInfo_get(open_file);
1158 		list_add_tail(&open_file->rlist, &tmp_list);
1159 	}
1160 	spin_unlock(&tcon->open_file_lock);
1161 
1162 	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1163 		if (cifs_reopen_file(open_file, false /* do not flush */))
1164 			tcon->need_reopen_files = true;
1165 		list_del_init(&open_file->rlist);
1166 		cifsFileInfo_put(open_file);
1167 	}
1168 }
1169 
1170 int cifs_closedir(struct inode *inode, struct file *file)
1171 {
1172 	int rc = 0;
1173 	unsigned int xid;
1174 	struct cifsFileInfo *cfile = file->private_data;
1175 	struct cifs_tcon *tcon;
1176 	struct TCP_Server_Info *server;
1177 	char *buf;
1178 
1179 	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1180 
1181 	if (cfile == NULL)
1182 		return rc;
1183 
1184 	xid = get_xid();
1185 	tcon = tlink_tcon(cfile->tlink);
1186 	server = tcon->ses->server;
1187 
1188 	cifs_dbg(FYI, "Freeing private data in close dir\n");
1189 	spin_lock(&cfile->file_info_lock);
1190 	if (server->ops->dir_needs_close(cfile)) {
1191 		cfile->invalidHandle = true;
1192 		spin_unlock(&cfile->file_info_lock);
1193 		if (server->ops->close_dir)
1194 			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1195 		else
1196 			rc = -ENOSYS;
1197 		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1198 		/* not much we can do if it fails anyway, ignore rc */
1199 		rc = 0;
1200 	} else
1201 		spin_unlock(&cfile->file_info_lock);
1202 
1203 	buf = cfile->srch_inf.ntwrk_buf_start;
1204 	if (buf) {
1205 		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1206 		cfile->srch_inf.ntwrk_buf_start = NULL;
1207 		if (cfile->srch_inf.smallBuf)
1208 			cifs_small_buf_release(buf);
1209 		else
1210 			cifs_buf_release(buf);
1211 	}
1212 
1213 	cifs_put_tlink(cfile->tlink);
1214 	kfree(file->private_data);
1215 	file->private_data = NULL;
1216 	/* BB can we lock the filestruct while this is going on? */
1217 	free_xid(xid);
1218 	return rc;
1219 }
1220 
1221 static struct cifsLockInfo *
1222 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1223 {
1224 	struct cifsLockInfo *lock =
1225 		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1226 	if (!lock)
1227 		return lock;
1228 	lock->offset = offset;
1229 	lock->length = length;
1230 	lock->type = type;
1231 	lock->pid = current->tgid;
1232 	lock->flags = flags;
1233 	INIT_LIST_HEAD(&lock->blist);
1234 	init_waitqueue_head(&lock->block_q);
1235 	return lock;
1236 }
1237 
1238 void
1239 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1240 {
1241 	struct cifsLockInfo *li, *tmp;
1242 	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1243 		list_del_init(&li->blist);
1244 		wake_up(&li->block_q);
1245 	}
1246 }
1247 
1248 #define CIFS_LOCK_OP	0
1249 #define CIFS_READ_OP	1
1250 #define CIFS_WRITE_OP	2
1251 
1252 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1253 static bool
1254 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1255 			    __u64 length, __u8 type, __u16 flags,
1256 			    struct cifsFileInfo *cfile,
1257 			    struct cifsLockInfo **conf_lock, int rw_check)
1258 {
1259 	struct cifsLockInfo *li;
1260 	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1261 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1262 
1263 	list_for_each_entry(li, &fdlocks->locks, llist) {
1264 		if (offset + length <= li->offset ||
1265 		    offset >= li->offset + li->length)
1266 			continue;
1267 		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1268 		    server->ops->compare_fids(cfile, cur_cfile)) {
1269 			/* shared lock prevents write op through the same fid */
1270 			if (!(li->type & server->vals->shared_lock_type) ||
1271 			    rw_check != CIFS_WRITE_OP)
1272 				continue;
1273 		}
1274 		if ((type & server->vals->shared_lock_type) &&
1275 		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1276 		     current->tgid == li->pid) || type == li->type))
1277 			continue;
1278 		if (rw_check == CIFS_LOCK_OP &&
1279 		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1280 		    server->ops->compare_fids(cfile, cur_cfile))
1281 			continue;
1282 		if (conf_lock)
1283 			*conf_lock = li;
1284 		return true;
1285 	}
1286 	return false;
1287 }
1288 
1289 bool
1290 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1291 			__u8 type, __u16 flags,
1292 			struct cifsLockInfo **conf_lock, int rw_check)
1293 {
1294 	bool rc = false;
1295 	struct cifs_fid_locks *cur;
1296 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1297 
1298 	list_for_each_entry(cur, &cinode->llist, llist) {
1299 		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1300 						 flags, cfile, conf_lock,
1301 						 rw_check);
1302 		if (rc)
1303 			break;
1304 	}
1305 
1306 	return rc;
1307 }
1308 
1309 /*
1310  * Check if there is another lock that prevents us to set the lock (mandatory
1311  * style). If such a lock exists, update the flock structure with its
1312  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1313  * or leave it the same if we can't. Returns 0 if we don't need to request to
1314  * the server or 1 otherwise.
1315  */
1316 static int
1317 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1318 	       __u8 type, struct file_lock *flock)
1319 {
1320 	int rc = 0;
1321 	struct cifsLockInfo *conf_lock;
1322 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1323 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1324 	bool exist;
1325 
1326 	down_read(&cinode->lock_sem);
1327 
1328 	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1329 					flock->c.flc_flags, &conf_lock,
1330 					CIFS_LOCK_OP);
1331 	if (exist) {
1332 		flock->fl_start = conf_lock->offset;
1333 		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1334 		flock->c.flc_pid = conf_lock->pid;
1335 		if (conf_lock->type & server->vals->shared_lock_type)
1336 			flock->c.flc_type = F_RDLCK;
1337 		else
1338 			flock->c.flc_type = F_WRLCK;
1339 	} else if (!cinode->can_cache_brlcks)
1340 		rc = 1;
1341 	else
1342 		flock->c.flc_type = F_UNLCK;
1343 
1344 	up_read(&cinode->lock_sem);
1345 	return rc;
1346 }
1347 
1348 static void
1349 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1350 {
1351 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1352 	cifs_down_write(&cinode->lock_sem);
1353 	list_add_tail(&lock->llist, &cfile->llist->locks);
1354 	up_write(&cinode->lock_sem);
1355 }
1356 
1357 /*
1358  * Set the byte-range lock (mandatory style). Returns:
1359  * 1) 0, if we set the lock and don't need to request to the server;
1360  * 2) 1, if no locks prevent us but we need to request to the server;
1361  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1362  */
1363 static int
1364 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1365 		 bool wait)
1366 {
1367 	struct cifsLockInfo *conf_lock;
1368 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1369 	bool exist;
1370 	int rc = 0;
1371 
1372 try_again:
1373 	exist = false;
1374 	cifs_down_write(&cinode->lock_sem);
1375 
1376 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1377 					lock->type, lock->flags, &conf_lock,
1378 					CIFS_LOCK_OP);
1379 	if (!exist && cinode->can_cache_brlcks) {
1380 		list_add_tail(&lock->llist, &cfile->llist->locks);
1381 		up_write(&cinode->lock_sem);
1382 		return rc;
1383 	}
1384 
1385 	if (!exist)
1386 		rc = 1;
1387 	else if (!wait)
1388 		rc = -EACCES;
1389 	else {
1390 		list_add_tail(&lock->blist, &conf_lock->blist);
1391 		up_write(&cinode->lock_sem);
1392 		rc = wait_event_interruptible(lock->block_q,
1393 					(lock->blist.prev == &lock->blist) &&
1394 					(lock->blist.next == &lock->blist));
1395 		if (!rc)
1396 			goto try_again;
1397 		cifs_down_write(&cinode->lock_sem);
1398 		list_del_init(&lock->blist);
1399 	}
1400 
1401 	up_write(&cinode->lock_sem);
1402 	return rc;
1403 }
1404 
1405 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1406 /*
1407  * Check if there is another lock that prevents us to set the lock (posix
1408  * style). If such a lock exists, update the flock structure with its
1409  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1410  * or leave it the same if we can't. Returns 0 if we don't need to request to
1411  * the server or 1 otherwise.
1412  */
1413 static int
1414 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1415 {
1416 	int rc = 0;
1417 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1418 	unsigned char saved_type = flock->c.flc_type;
1419 
1420 	if ((flock->c.flc_flags & FL_POSIX) == 0)
1421 		return 1;
1422 
1423 	down_read(&cinode->lock_sem);
1424 	posix_test_lock(file, flock);
1425 
1426 	if (lock_is_unlock(flock) && !cinode->can_cache_brlcks) {
1427 		flock->c.flc_type = saved_type;
1428 		rc = 1;
1429 	}
1430 
1431 	up_read(&cinode->lock_sem);
1432 	return rc;
1433 }
1434 
1435 /*
1436  * Set the byte-range lock (posix style). Returns:
1437  * 1) <0, if the error occurs while setting the lock;
1438  * 2) 0, if we set the lock and don't need to request to the server;
1439  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1440  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1441  */
1442 static int
1443 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1444 {
1445 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1446 	int rc = FILE_LOCK_DEFERRED + 1;
1447 
1448 	if ((flock->c.flc_flags & FL_POSIX) == 0)
1449 		return rc;
1450 
1451 	cifs_down_write(&cinode->lock_sem);
1452 	if (!cinode->can_cache_brlcks) {
1453 		up_write(&cinode->lock_sem);
1454 		return rc;
1455 	}
1456 
1457 	rc = posix_lock_file(file, flock, NULL);
1458 	up_write(&cinode->lock_sem);
1459 	return rc;
1460 }
1461 
1462 int
1463 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1464 {
1465 	unsigned int xid;
1466 	int rc = 0, stored_rc;
1467 	struct cifsLockInfo *li, *tmp;
1468 	struct cifs_tcon *tcon;
1469 	unsigned int num, max_num, max_buf;
1470 	LOCKING_ANDX_RANGE *buf, *cur;
1471 	static const int types[] = {
1472 		LOCKING_ANDX_LARGE_FILES,
1473 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1474 	};
1475 	int i;
1476 
1477 	xid = get_xid();
1478 	tcon = tlink_tcon(cfile->tlink);
1479 
1480 	/*
1481 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1482 	 * and check it before using.
1483 	 */
1484 	max_buf = tcon->ses->server->maxBuf;
1485 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1486 		free_xid(xid);
1487 		return -EINVAL;
1488 	}
1489 
1490 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1491 		     PAGE_SIZE);
1492 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1493 			PAGE_SIZE);
1494 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1495 						sizeof(LOCKING_ANDX_RANGE);
1496 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1497 	if (!buf) {
1498 		free_xid(xid);
1499 		return -ENOMEM;
1500 	}
1501 
1502 	for (i = 0; i < 2; i++) {
1503 		cur = buf;
1504 		num = 0;
1505 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1506 			if (li->type != types[i])
1507 				continue;
1508 			cur->Pid = cpu_to_le16(li->pid);
1509 			cur->LengthLow = cpu_to_le32((u32)li->length);
1510 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1511 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1512 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1513 			if (++num == max_num) {
1514 				stored_rc = cifs_lockv(xid, tcon,
1515 						       cfile->fid.netfid,
1516 						       (__u8)li->type, 0, num,
1517 						       buf);
1518 				if (stored_rc)
1519 					rc = stored_rc;
1520 				cur = buf;
1521 				num = 0;
1522 			} else
1523 				cur++;
1524 		}
1525 
1526 		if (num) {
1527 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1528 					       (__u8)types[i], 0, num, buf);
1529 			if (stored_rc)
1530 				rc = stored_rc;
1531 		}
1532 	}
1533 
1534 	kfree(buf);
1535 	free_xid(xid);
1536 	return rc;
1537 }
1538 
1539 static __u32
1540 hash_lockowner(fl_owner_t owner)
1541 {
1542 	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1543 }
1544 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1545 
1546 struct lock_to_push {
1547 	struct list_head llist;
1548 	__u64 offset;
1549 	__u64 length;
1550 	__u32 pid;
1551 	__u16 netfid;
1552 	__u8 type;
1553 };
1554 
1555 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1556 static int
1557 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1558 {
1559 	struct inode *inode = d_inode(cfile->dentry);
1560 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1561 	struct file_lock *flock;
1562 	struct file_lock_context *flctx = locks_inode_context(inode);
1563 	unsigned int count = 0, i;
1564 	int rc = 0, xid, type;
1565 	struct list_head locks_to_send, *el;
1566 	struct lock_to_push *lck, *tmp;
1567 	__u64 length;
1568 
1569 	xid = get_xid();
1570 
1571 	if (!flctx)
1572 		goto out;
1573 
1574 	spin_lock(&flctx->flc_lock);
1575 	list_for_each(el, &flctx->flc_posix) {
1576 		count++;
1577 	}
1578 	spin_unlock(&flctx->flc_lock);
1579 
1580 	INIT_LIST_HEAD(&locks_to_send);
1581 
1582 	/*
1583 	 * Allocating count locks is enough because no FL_POSIX locks can be
1584 	 * added to the list while we are holding cinode->lock_sem that
1585 	 * protects locking operations of this inode.
1586 	 */
1587 	for (i = 0; i < count; i++) {
1588 		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1589 		if (!lck) {
1590 			rc = -ENOMEM;
1591 			goto err_out;
1592 		}
1593 		list_add_tail(&lck->llist, &locks_to_send);
1594 	}
1595 
1596 	el = locks_to_send.next;
1597 	spin_lock(&flctx->flc_lock);
1598 	for_each_file_lock(flock, &flctx->flc_posix) {
1599 		unsigned char ftype = flock->c.flc_type;
1600 
1601 		if (el == &locks_to_send) {
1602 			/*
1603 			 * The list ended. We don't have enough allocated
1604 			 * structures - something is really wrong.
1605 			 */
1606 			cifs_dbg(VFS, "Can't push all brlocks!\n");
1607 			break;
1608 		}
1609 		length = cifs_flock_len(flock);
1610 		if (ftype == F_RDLCK || ftype == F_SHLCK)
1611 			type = CIFS_RDLCK;
1612 		else
1613 			type = CIFS_WRLCK;
1614 		lck = list_entry(el, struct lock_to_push, llist);
1615 		lck->pid = hash_lockowner(flock->c.flc_owner);
1616 		lck->netfid = cfile->fid.netfid;
1617 		lck->length = length;
1618 		lck->type = type;
1619 		lck->offset = flock->fl_start;
1620 	}
1621 	spin_unlock(&flctx->flc_lock);
1622 
1623 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1624 		int stored_rc;
1625 
1626 		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1627 					     lck->offset, lck->length, NULL,
1628 					     lck->type, 0);
1629 		if (stored_rc)
1630 			rc = stored_rc;
1631 		list_del(&lck->llist);
1632 		kfree(lck);
1633 	}
1634 
1635 out:
1636 	free_xid(xid);
1637 	return rc;
1638 err_out:
1639 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1640 		list_del(&lck->llist);
1641 		kfree(lck);
1642 	}
1643 	goto out;
1644 }
1645 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1646 
1647 static int
1648 cifs_push_locks(struct cifsFileInfo *cfile)
1649 {
1650 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1651 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1652 	int rc = 0;
1653 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1654 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1655 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1656 
1657 	/* we are going to update can_cache_brlcks here - need a write access */
1658 	cifs_down_write(&cinode->lock_sem);
1659 	if (!cinode->can_cache_brlcks) {
1660 		up_write(&cinode->lock_sem);
1661 		return rc;
1662 	}
1663 
1664 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1665 	if (cap_unix(tcon->ses) &&
1666 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1667 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1668 		rc = cifs_push_posix_locks(cfile);
1669 	else
1670 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1671 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1672 
1673 	cinode->can_cache_brlcks = false;
1674 	up_write(&cinode->lock_sem);
1675 	return rc;
1676 }
1677 
1678 static void
1679 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1680 		bool *wait_flag, struct TCP_Server_Info *server)
1681 {
1682 	if (flock->c.flc_flags & FL_POSIX)
1683 		cifs_dbg(FYI, "Posix\n");
1684 	if (flock->c.flc_flags & FL_FLOCK)
1685 		cifs_dbg(FYI, "Flock\n");
1686 	if (flock->c.flc_flags & FL_SLEEP) {
1687 		cifs_dbg(FYI, "Blocking lock\n");
1688 		*wait_flag = true;
1689 	}
1690 	if (flock->c.flc_flags & FL_ACCESS)
1691 		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1692 	if (flock->c.flc_flags & FL_LEASE)
1693 		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1694 	if (flock->c.flc_flags &
1695 	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1696 	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1697 		cifs_dbg(FYI, "Unknown lock flags 0x%x\n",
1698 		         flock->c.flc_flags);
1699 
1700 	*type = server->vals->large_lock_type;
1701 	if (lock_is_write(flock)) {
1702 		cifs_dbg(FYI, "F_WRLCK\n");
1703 		*type |= server->vals->exclusive_lock_type;
1704 		*lock = 1;
1705 	} else if (lock_is_unlock(flock)) {
1706 		cifs_dbg(FYI, "F_UNLCK\n");
1707 		*type |= server->vals->unlock_lock_type;
1708 		*unlock = 1;
1709 		/* Check if unlock includes more than one lock range */
1710 	} else if (lock_is_read(flock)) {
1711 		cifs_dbg(FYI, "F_RDLCK\n");
1712 		*type |= server->vals->shared_lock_type;
1713 		*lock = 1;
1714 	} else if (flock->c.flc_type == F_EXLCK) {
1715 		cifs_dbg(FYI, "F_EXLCK\n");
1716 		*type |= server->vals->exclusive_lock_type;
1717 		*lock = 1;
1718 	} else if (flock->c.flc_type == F_SHLCK) {
1719 		cifs_dbg(FYI, "F_SHLCK\n");
1720 		*type |= server->vals->shared_lock_type;
1721 		*lock = 1;
1722 	} else
1723 		cifs_dbg(FYI, "Unknown type of lock\n");
1724 }
1725 
1726 static int
1727 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1728 	   bool wait_flag, bool posix_lck, unsigned int xid)
1729 {
1730 	int rc = 0;
1731 	__u64 length = cifs_flock_len(flock);
1732 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1733 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1734 	struct TCP_Server_Info *server = tcon->ses->server;
1735 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1736 	__u16 netfid = cfile->fid.netfid;
1737 
1738 	if (posix_lck) {
1739 		int posix_lock_type;
1740 
1741 		rc = cifs_posix_lock_test(file, flock);
1742 		if (!rc)
1743 			return rc;
1744 
1745 		if (type & server->vals->shared_lock_type)
1746 			posix_lock_type = CIFS_RDLCK;
1747 		else
1748 			posix_lock_type = CIFS_WRLCK;
1749 		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1750 				      hash_lockowner(flock->c.flc_owner),
1751 				      flock->fl_start, length, flock,
1752 				      posix_lock_type, wait_flag);
1753 		return rc;
1754 	}
1755 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1756 
1757 	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1758 	if (!rc)
1759 		return rc;
1760 
1761 	/* BB we could chain these into one lock request BB */
1762 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1763 				    1, 0, false);
1764 	if (rc == 0) {
1765 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1766 					    type, 0, 1, false);
1767 		flock->c.flc_type = F_UNLCK;
1768 		if (rc != 0)
1769 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1770 				 rc);
1771 		return 0;
1772 	}
1773 
1774 	if (type & server->vals->shared_lock_type) {
1775 		flock->c.flc_type = F_WRLCK;
1776 		return 0;
1777 	}
1778 
1779 	type &= ~server->vals->exclusive_lock_type;
1780 
1781 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1782 				    type | server->vals->shared_lock_type,
1783 				    1, 0, false);
1784 	if (rc == 0) {
1785 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1786 			type | server->vals->shared_lock_type, 0, 1, false);
1787 		flock->c.flc_type = F_RDLCK;
1788 		if (rc != 0)
1789 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1790 				 rc);
1791 	} else
1792 		flock->c.flc_type = F_WRLCK;
1793 
1794 	return 0;
1795 }
1796 
1797 void
1798 cifs_move_llist(struct list_head *source, struct list_head *dest)
1799 {
1800 	struct list_head *li, *tmp;
1801 	list_for_each_safe(li, tmp, source)
1802 		list_move(li, dest);
1803 }
1804 
1805 void
1806 cifs_free_llist(struct list_head *llist)
1807 {
1808 	struct cifsLockInfo *li, *tmp;
1809 	list_for_each_entry_safe(li, tmp, llist, llist) {
1810 		cifs_del_lock_waiters(li);
1811 		list_del(&li->llist);
1812 		kfree(li);
1813 	}
1814 }
1815 
1816 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1817 int
1818 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1819 		  unsigned int xid)
1820 {
1821 	int rc = 0, stored_rc;
1822 	static const int types[] = {
1823 		LOCKING_ANDX_LARGE_FILES,
1824 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1825 	};
1826 	unsigned int i;
1827 	unsigned int max_num, num, max_buf;
1828 	LOCKING_ANDX_RANGE *buf, *cur;
1829 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1830 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1831 	struct cifsLockInfo *li, *tmp;
1832 	__u64 length = cifs_flock_len(flock);
1833 	struct list_head tmp_llist;
1834 
1835 	INIT_LIST_HEAD(&tmp_llist);
1836 
1837 	/*
1838 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1839 	 * and check it before using.
1840 	 */
1841 	max_buf = tcon->ses->server->maxBuf;
1842 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1843 		return -EINVAL;
1844 
1845 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1846 		     PAGE_SIZE);
1847 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1848 			PAGE_SIZE);
1849 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1850 						sizeof(LOCKING_ANDX_RANGE);
1851 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1852 	if (!buf)
1853 		return -ENOMEM;
1854 
1855 	cifs_down_write(&cinode->lock_sem);
1856 	for (i = 0; i < 2; i++) {
1857 		cur = buf;
1858 		num = 0;
1859 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1860 			if (flock->fl_start > li->offset ||
1861 			    (flock->fl_start + length) <
1862 			    (li->offset + li->length))
1863 				continue;
1864 			if (current->tgid != li->pid)
1865 				continue;
1866 			if (types[i] != li->type)
1867 				continue;
1868 			if (cinode->can_cache_brlcks) {
1869 				/*
1870 				 * We can cache brlock requests - simply remove
1871 				 * a lock from the file's list.
1872 				 */
1873 				list_del(&li->llist);
1874 				cifs_del_lock_waiters(li);
1875 				kfree(li);
1876 				continue;
1877 			}
1878 			cur->Pid = cpu_to_le16(li->pid);
1879 			cur->LengthLow = cpu_to_le32((u32)li->length);
1880 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1881 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1882 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1883 			/*
1884 			 * We need to save a lock here to let us add it again to
1885 			 * the file's list if the unlock range request fails on
1886 			 * the server.
1887 			 */
1888 			list_move(&li->llist, &tmp_llist);
1889 			if (++num == max_num) {
1890 				stored_rc = cifs_lockv(xid, tcon,
1891 						       cfile->fid.netfid,
1892 						       li->type, num, 0, buf);
1893 				if (stored_rc) {
1894 					/*
1895 					 * We failed on the unlock range
1896 					 * request - add all locks from the tmp
1897 					 * list to the head of the file's list.
1898 					 */
1899 					cifs_move_llist(&tmp_llist,
1900 							&cfile->llist->locks);
1901 					rc = stored_rc;
1902 				} else
1903 					/*
1904 					 * The unlock range request succeed -
1905 					 * free the tmp list.
1906 					 */
1907 					cifs_free_llist(&tmp_llist);
1908 				cur = buf;
1909 				num = 0;
1910 			} else
1911 				cur++;
1912 		}
1913 		if (num) {
1914 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1915 					       types[i], num, 0, buf);
1916 			if (stored_rc) {
1917 				cifs_move_llist(&tmp_llist,
1918 						&cfile->llist->locks);
1919 				rc = stored_rc;
1920 			} else
1921 				cifs_free_llist(&tmp_llist);
1922 		}
1923 	}
1924 
1925 	up_write(&cinode->lock_sem);
1926 	kfree(buf);
1927 	return rc;
1928 }
1929 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1930 
1931 static int
1932 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1933 	   bool wait_flag, bool posix_lck, int lock, int unlock,
1934 	   unsigned int xid)
1935 {
1936 	int rc = 0;
1937 	__u64 length = cifs_flock_len(flock);
1938 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1939 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1940 	struct TCP_Server_Info *server = tcon->ses->server;
1941 	struct inode *inode = d_inode(cfile->dentry);
1942 
1943 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1944 	if (posix_lck) {
1945 		int posix_lock_type;
1946 
1947 		rc = cifs_posix_lock_set(file, flock);
1948 		if (rc <= FILE_LOCK_DEFERRED)
1949 			return rc;
1950 
1951 		if (type & server->vals->shared_lock_type)
1952 			posix_lock_type = CIFS_RDLCK;
1953 		else
1954 			posix_lock_type = CIFS_WRLCK;
1955 
1956 		if (unlock == 1)
1957 			posix_lock_type = CIFS_UNLCK;
1958 
1959 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1960 				      hash_lockowner(flock->c.flc_owner),
1961 				      flock->fl_start, length,
1962 				      NULL, posix_lock_type, wait_flag);
1963 		goto out;
1964 	}
1965 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1966 	if (lock) {
1967 		struct cifsLockInfo *lock;
1968 
1969 		lock = cifs_lock_init(flock->fl_start, length, type,
1970 				      flock->c.flc_flags);
1971 		if (!lock)
1972 			return -ENOMEM;
1973 
1974 		rc = cifs_lock_add_if(cfile, lock, wait_flag);
1975 		if (rc < 0) {
1976 			kfree(lock);
1977 			return rc;
1978 		}
1979 		if (!rc)
1980 			goto out;
1981 
1982 		/*
1983 		 * Windows 7 server can delay breaking lease from read to None
1984 		 * if we set a byte-range lock on a file - break it explicitly
1985 		 * before sending the lock to the server to be sure the next
1986 		 * read won't conflict with non-overlapted locks due to
1987 		 * pagereading.
1988 		 */
1989 		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1990 					CIFS_CACHE_READ(CIFS_I(inode))) {
1991 			cifs_zap_mapping(inode);
1992 			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1993 				 inode);
1994 			CIFS_I(inode)->oplock = 0;
1995 		}
1996 
1997 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1998 					    type, 1, 0, wait_flag);
1999 		if (rc) {
2000 			kfree(lock);
2001 			return rc;
2002 		}
2003 
2004 		cifs_lock_add(cfile, lock);
2005 	} else if (unlock)
2006 		rc = server->ops->mand_unlock_range(cfile, flock, xid);
2007 
2008 out:
2009 	if ((flock->c.flc_flags & FL_POSIX) || (flock->c.flc_flags & FL_FLOCK)) {
2010 		/*
2011 		 * If this is a request to remove all locks because we
2012 		 * are closing the file, it doesn't matter if the
2013 		 * unlocking failed as both cifs.ko and the SMB server
2014 		 * remove the lock on file close
2015 		 */
2016 		if (rc) {
2017 			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2018 			if (!(flock->c.flc_flags & FL_CLOSE))
2019 				return rc;
2020 		}
2021 		rc = locks_lock_file_wait(file, flock);
2022 	}
2023 	return rc;
2024 }
2025 
2026 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2027 {
2028 	int rc, xid;
2029 	int lock = 0, unlock = 0;
2030 	bool wait_flag = false;
2031 	bool posix_lck = false;
2032 	struct cifs_sb_info *cifs_sb;
2033 	struct cifs_tcon *tcon;
2034 	struct cifsFileInfo *cfile;
2035 	__u32 type;
2036 
2037 	xid = get_xid();
2038 
2039 	if (!(fl->c.flc_flags & FL_FLOCK)) {
2040 		rc = -ENOLCK;
2041 		free_xid(xid);
2042 		return rc;
2043 	}
2044 
2045 	cfile = (struct cifsFileInfo *)file->private_data;
2046 	tcon = tlink_tcon(cfile->tlink);
2047 
2048 	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2049 			tcon->ses->server);
2050 	cifs_sb = CIFS_FILE_SB(file);
2051 
2052 	if (cap_unix(tcon->ses) &&
2053 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2054 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2055 		posix_lck = true;
2056 
2057 	if (!lock && !unlock) {
2058 		/*
2059 		 * if no lock or unlock then nothing to do since we do not
2060 		 * know what it is
2061 		 */
2062 		rc = -EOPNOTSUPP;
2063 		free_xid(xid);
2064 		return rc;
2065 	}
2066 
2067 	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2068 			xid);
2069 	free_xid(xid);
2070 	return rc;
2071 
2072 
2073 }
2074 
2075 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2076 {
2077 	int rc, xid;
2078 	int lock = 0, unlock = 0;
2079 	bool wait_flag = false;
2080 	bool posix_lck = false;
2081 	struct cifs_sb_info *cifs_sb;
2082 	struct cifs_tcon *tcon;
2083 	struct cifsFileInfo *cfile;
2084 	__u32 type;
2085 
2086 	rc = -EACCES;
2087 	xid = get_xid();
2088 
2089 	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2090 		 flock->c.flc_flags, flock->c.flc_type,
2091 		 (long long)flock->fl_start,
2092 		 (long long)flock->fl_end);
2093 
2094 	cfile = (struct cifsFileInfo *)file->private_data;
2095 	tcon = tlink_tcon(cfile->tlink);
2096 
2097 	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2098 			tcon->ses->server);
2099 	cifs_sb = CIFS_FILE_SB(file);
2100 	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2101 
2102 	if (cap_unix(tcon->ses) &&
2103 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2104 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2105 		posix_lck = true;
2106 	/*
2107 	 * BB add code here to normalize offset and length to account for
2108 	 * negative length which we can not accept over the wire.
2109 	 */
2110 	if (IS_GETLK(cmd)) {
2111 		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2112 		free_xid(xid);
2113 		return rc;
2114 	}
2115 
2116 	if (!lock && !unlock) {
2117 		/*
2118 		 * if no lock or unlock then nothing to do since we do not
2119 		 * know what it is
2120 		 */
2121 		free_xid(xid);
2122 		return -EOPNOTSUPP;
2123 	}
2124 
2125 	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2126 			xid);
2127 	free_xid(xid);
2128 	return rc;
2129 }
2130 
2131 /*
2132  * update the file size (if needed) after a write. Should be called with
2133  * the inode->i_lock held
2134  */
2135 void
2136 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2137 		      unsigned int bytes_written)
2138 {
2139 	loff_t end_of_write = offset + bytes_written;
2140 
2141 	if (end_of_write > cifsi->netfs.remote_i_size)
2142 		netfs_resize_file(&cifsi->netfs, end_of_write, true);
2143 }
2144 
2145 static ssize_t
2146 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2147 	   size_t write_size, loff_t *offset)
2148 {
2149 	int rc = 0;
2150 	unsigned int bytes_written = 0;
2151 	unsigned int total_written;
2152 	struct cifs_tcon *tcon;
2153 	struct TCP_Server_Info *server;
2154 	unsigned int xid;
2155 	struct dentry *dentry = open_file->dentry;
2156 	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2157 	struct cifs_io_parms io_parms = {0};
2158 
2159 	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2160 		 write_size, *offset, dentry);
2161 
2162 	tcon = tlink_tcon(open_file->tlink);
2163 	server = tcon->ses->server;
2164 
2165 	if (!server->ops->sync_write)
2166 		return -ENOSYS;
2167 
2168 	xid = get_xid();
2169 
2170 	for (total_written = 0; write_size > total_written;
2171 	     total_written += bytes_written) {
2172 		rc = -EAGAIN;
2173 		while (rc == -EAGAIN) {
2174 			struct kvec iov[2];
2175 			unsigned int len;
2176 
2177 			if (open_file->invalidHandle) {
2178 				/* we could deadlock if we called
2179 				   filemap_fdatawait from here so tell
2180 				   reopen_file not to flush data to
2181 				   server now */
2182 				rc = cifs_reopen_file(open_file, false);
2183 				if (rc != 0)
2184 					break;
2185 			}
2186 
2187 			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2188 				  (unsigned int)write_size - total_written);
2189 			/* iov[0] is reserved for smb header */
2190 			iov[1].iov_base = (char *)write_data + total_written;
2191 			iov[1].iov_len = len;
2192 			io_parms.pid = pid;
2193 			io_parms.tcon = tcon;
2194 			io_parms.offset = *offset;
2195 			io_parms.length = len;
2196 			rc = server->ops->sync_write(xid, &open_file->fid,
2197 					&io_parms, &bytes_written, iov, 1);
2198 		}
2199 		if (rc || (bytes_written == 0)) {
2200 			if (total_written)
2201 				break;
2202 			else {
2203 				free_xid(xid);
2204 				return rc;
2205 			}
2206 		} else {
2207 			spin_lock(&d_inode(dentry)->i_lock);
2208 			cifs_update_eof(cifsi, *offset, bytes_written);
2209 			spin_unlock(&d_inode(dentry)->i_lock);
2210 			*offset += bytes_written;
2211 		}
2212 	}
2213 
2214 	cifs_stats_bytes_written(tcon, total_written);
2215 
2216 	if (total_written > 0) {
2217 		spin_lock(&d_inode(dentry)->i_lock);
2218 		if (*offset > d_inode(dentry)->i_size) {
2219 			i_size_write(d_inode(dentry), *offset);
2220 			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2221 		}
2222 		spin_unlock(&d_inode(dentry)->i_lock);
2223 	}
2224 	mark_inode_dirty_sync(d_inode(dentry));
2225 	free_xid(xid);
2226 	return total_written;
2227 }
2228 
2229 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2230 					bool fsuid_only)
2231 {
2232 	struct cifsFileInfo *open_file = NULL;
2233 	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2234 
2235 	/* only filter by fsuid on multiuser mounts */
2236 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2237 		fsuid_only = false;
2238 
2239 	spin_lock(&cifs_inode->open_file_lock);
2240 	/* we could simply get the first_list_entry since write-only entries
2241 	   are always at the end of the list but since the first entry might
2242 	   have a close pending, we go through the whole list */
2243 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2244 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2245 			continue;
2246 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2247 			if ((!open_file->invalidHandle)) {
2248 				/* found a good file */
2249 				/* lock it so it will not be closed on us */
2250 				cifsFileInfo_get(open_file);
2251 				spin_unlock(&cifs_inode->open_file_lock);
2252 				return open_file;
2253 			} /* else might as well continue, and look for
2254 			     another, or simply have the caller reopen it
2255 			     again rather than trying to fix this handle */
2256 		} else /* write only file */
2257 			break; /* write only files are last so must be done */
2258 	}
2259 	spin_unlock(&cifs_inode->open_file_lock);
2260 	return NULL;
2261 }
2262 
2263 /* Return -EBADF if no handle is found and general rc otherwise */
2264 int
2265 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2266 		       struct cifsFileInfo **ret_file)
2267 {
2268 	struct cifsFileInfo *open_file, *inv_file = NULL;
2269 	struct cifs_sb_info *cifs_sb;
2270 	bool any_available = false;
2271 	int rc = -EBADF;
2272 	unsigned int refind = 0;
2273 	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2274 	bool with_delete = flags & FIND_WR_WITH_DELETE;
2275 	*ret_file = NULL;
2276 
2277 	/*
2278 	 * Having a null inode here (because mapping->host was set to zero by
2279 	 * the VFS or MM) should not happen but we had reports of on oops (due
2280 	 * to it being zero) during stress testcases so we need to check for it
2281 	 */
2282 
2283 	if (cifs_inode == NULL) {
2284 		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2285 		dump_stack();
2286 		return rc;
2287 	}
2288 
2289 	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2290 
2291 	/* only filter by fsuid on multiuser mounts */
2292 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2293 		fsuid_only = false;
2294 
2295 	spin_lock(&cifs_inode->open_file_lock);
2296 refind_writable:
2297 	if (refind > MAX_REOPEN_ATT) {
2298 		spin_unlock(&cifs_inode->open_file_lock);
2299 		return rc;
2300 	}
2301 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2302 		if (!any_available && open_file->pid != current->tgid)
2303 			continue;
2304 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2305 			continue;
2306 		if (with_delete && !(open_file->fid.access & DELETE))
2307 			continue;
2308 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2309 			if (!open_file->invalidHandle) {
2310 				/* found a good writable file */
2311 				cifsFileInfo_get(open_file);
2312 				spin_unlock(&cifs_inode->open_file_lock);
2313 				*ret_file = open_file;
2314 				return 0;
2315 			} else {
2316 				if (!inv_file)
2317 					inv_file = open_file;
2318 			}
2319 		}
2320 	}
2321 	/* couldn't find useable FH with same pid, try any available */
2322 	if (!any_available) {
2323 		any_available = true;
2324 		goto refind_writable;
2325 	}
2326 
2327 	if (inv_file) {
2328 		any_available = false;
2329 		cifsFileInfo_get(inv_file);
2330 	}
2331 
2332 	spin_unlock(&cifs_inode->open_file_lock);
2333 
2334 	if (inv_file) {
2335 		rc = cifs_reopen_file(inv_file, false);
2336 		if (!rc) {
2337 			*ret_file = inv_file;
2338 			return 0;
2339 		}
2340 
2341 		spin_lock(&cifs_inode->open_file_lock);
2342 		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2343 		spin_unlock(&cifs_inode->open_file_lock);
2344 		cifsFileInfo_put(inv_file);
2345 		++refind;
2346 		inv_file = NULL;
2347 		spin_lock(&cifs_inode->open_file_lock);
2348 		goto refind_writable;
2349 	}
2350 
2351 	return rc;
2352 }
2353 
2354 struct cifsFileInfo *
2355 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2356 {
2357 	struct cifsFileInfo *cfile;
2358 	int rc;
2359 
2360 	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2361 	if (rc)
2362 		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2363 
2364 	return cfile;
2365 }
2366 
2367 int
2368 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2369 		       int flags,
2370 		       struct cifsFileInfo **ret_file)
2371 {
2372 	struct cifsFileInfo *cfile;
2373 	void *page = alloc_dentry_path();
2374 
2375 	*ret_file = NULL;
2376 
2377 	spin_lock(&tcon->open_file_lock);
2378 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2379 		struct cifsInodeInfo *cinode;
2380 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2381 		if (IS_ERR(full_path)) {
2382 			spin_unlock(&tcon->open_file_lock);
2383 			free_dentry_path(page);
2384 			return PTR_ERR(full_path);
2385 		}
2386 		if (strcmp(full_path, name))
2387 			continue;
2388 
2389 		cinode = CIFS_I(d_inode(cfile->dentry));
2390 		spin_unlock(&tcon->open_file_lock);
2391 		free_dentry_path(page);
2392 		return cifs_get_writable_file(cinode, flags, ret_file);
2393 	}
2394 
2395 	spin_unlock(&tcon->open_file_lock);
2396 	free_dentry_path(page);
2397 	return -ENOENT;
2398 }
2399 
2400 int
2401 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2402 		       struct cifsFileInfo **ret_file)
2403 {
2404 	struct cifsFileInfo *cfile;
2405 	void *page = alloc_dentry_path();
2406 
2407 	*ret_file = NULL;
2408 
2409 	spin_lock(&tcon->open_file_lock);
2410 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2411 		struct cifsInodeInfo *cinode;
2412 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2413 		if (IS_ERR(full_path)) {
2414 			spin_unlock(&tcon->open_file_lock);
2415 			free_dentry_path(page);
2416 			return PTR_ERR(full_path);
2417 		}
2418 		if (strcmp(full_path, name))
2419 			continue;
2420 
2421 		cinode = CIFS_I(d_inode(cfile->dentry));
2422 		spin_unlock(&tcon->open_file_lock);
2423 		free_dentry_path(page);
2424 		*ret_file = find_readable_file(cinode, 0);
2425 		return *ret_file ? 0 : -ENOENT;
2426 	}
2427 
2428 	spin_unlock(&tcon->open_file_lock);
2429 	free_dentry_path(page);
2430 	return -ENOENT;
2431 }
2432 
2433 void
2434 cifs_writedata_release(struct kref *refcount)
2435 {
2436 	struct cifs_writedata *wdata = container_of(refcount,
2437 					struct cifs_writedata, refcount);
2438 #ifdef CONFIG_CIFS_SMB_DIRECT
2439 	if (wdata->mr) {
2440 		smbd_deregister_mr(wdata->mr);
2441 		wdata->mr = NULL;
2442 	}
2443 #endif
2444 
2445 	if (wdata->cfile)
2446 		cifsFileInfo_put(wdata->cfile);
2447 
2448 	kfree(wdata);
2449 }
2450 
2451 /*
2452  * Write failed with a retryable error. Resend the write request. It's also
2453  * possible that the page was redirtied so re-clean the page.
2454  */
2455 static void
2456 cifs_writev_requeue(struct cifs_writedata *wdata)
2457 {
2458 	int rc = 0;
2459 	struct inode *inode = d_inode(wdata->cfile->dentry);
2460 	struct TCP_Server_Info *server;
2461 	unsigned int rest_len = wdata->bytes;
2462 	loff_t fpos = wdata->offset;
2463 
2464 	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2465 	do {
2466 		struct cifs_writedata *wdata2;
2467 		unsigned int wsize, cur_len;
2468 
2469 		wsize = server->ops->wp_retry_size(inode);
2470 		if (wsize < rest_len) {
2471 			if (wsize < PAGE_SIZE) {
2472 				rc = -EOPNOTSUPP;
2473 				break;
2474 			}
2475 			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2476 		} else {
2477 			cur_len = rest_len;
2478 		}
2479 
2480 		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2481 		if (!wdata2) {
2482 			rc = -ENOMEM;
2483 			break;
2484 		}
2485 
2486 		wdata2->sync_mode = wdata->sync_mode;
2487 		wdata2->offset	= fpos;
2488 		wdata2->bytes	= cur_len;
2489 		wdata2->iter	= wdata->iter;
2490 
2491 		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2492 		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2493 
2494 		if (iov_iter_is_xarray(&wdata2->iter))
2495 			/* Check for pages having been redirtied and clean
2496 			 * them.  We can do this by walking the xarray.  If
2497 			 * it's not an xarray, then it's a DIO and we shouldn't
2498 			 * be mucking around with the page bits.
2499 			 */
2500 			cifs_undirty_folios(inode, fpos, cur_len);
2501 
2502 		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2503 					    &wdata2->cfile);
2504 		if (!wdata2->cfile) {
2505 			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2506 				 rc);
2507 			if (!is_retryable_error(rc))
2508 				rc = -EBADF;
2509 		} else {
2510 			wdata2->pid = wdata2->cfile->pid;
2511 			rc = server->ops->async_writev(wdata2,
2512 						       cifs_writedata_release);
2513 		}
2514 
2515 		kref_put(&wdata2->refcount, cifs_writedata_release);
2516 		if (rc) {
2517 			if (is_retryable_error(rc))
2518 				continue;
2519 			fpos += cur_len;
2520 			rest_len -= cur_len;
2521 			break;
2522 		}
2523 
2524 		fpos += cur_len;
2525 		rest_len -= cur_len;
2526 	} while (rest_len > 0);
2527 
2528 	/* Clean up remaining pages from the original wdata */
2529 	if (iov_iter_is_xarray(&wdata->iter))
2530 		cifs_pages_write_failed(inode, fpos, rest_len);
2531 
2532 	if (rc != 0 && !is_retryable_error(rc))
2533 		mapping_set_error(inode->i_mapping, rc);
2534 	kref_put(&wdata->refcount, cifs_writedata_release);
2535 }
2536 
2537 void
2538 cifs_writev_complete(struct work_struct *work)
2539 {
2540 	struct cifs_writedata *wdata = container_of(work,
2541 						struct cifs_writedata, work);
2542 	struct inode *inode = d_inode(wdata->cfile->dentry);
2543 
2544 	if (wdata->result == 0) {
2545 		spin_lock(&inode->i_lock);
2546 		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2547 		spin_unlock(&inode->i_lock);
2548 		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2549 					 wdata->bytes);
2550 	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2551 		return cifs_writev_requeue(wdata);
2552 
2553 	if (wdata->result == -EAGAIN)
2554 		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2555 	else if (wdata->result < 0)
2556 		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2557 	else
2558 		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2559 
2560 	if (wdata->result != -EAGAIN)
2561 		mapping_set_error(inode->i_mapping, wdata->result);
2562 	kref_put(&wdata->refcount, cifs_writedata_release);
2563 }
2564 
2565 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2566 {
2567 	struct cifs_writedata *wdata;
2568 
2569 	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2570 	if (wdata != NULL) {
2571 		kref_init(&wdata->refcount);
2572 		INIT_LIST_HEAD(&wdata->list);
2573 		init_completion(&wdata->done);
2574 		INIT_WORK(&wdata->work, complete);
2575 	}
2576 	return wdata;
2577 }
2578 
2579 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2580 {
2581 	struct address_space *mapping = page->mapping;
2582 	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2583 	char *write_data;
2584 	int rc = -EFAULT;
2585 	int bytes_written = 0;
2586 	struct inode *inode;
2587 	struct cifsFileInfo *open_file;
2588 
2589 	if (!mapping || !mapping->host)
2590 		return -EFAULT;
2591 
2592 	inode = page->mapping->host;
2593 
2594 	offset += (loff_t)from;
2595 	write_data = kmap(page);
2596 	write_data += from;
2597 
2598 	if ((to > PAGE_SIZE) || (from > to)) {
2599 		kunmap(page);
2600 		return -EIO;
2601 	}
2602 
2603 	/* racing with truncate? */
2604 	if (offset > mapping->host->i_size) {
2605 		kunmap(page);
2606 		return 0; /* don't care */
2607 	}
2608 
2609 	/* check to make sure that we are not extending the file */
2610 	if (mapping->host->i_size - offset < (loff_t)to)
2611 		to = (unsigned)(mapping->host->i_size - offset);
2612 
2613 	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2614 				    &open_file);
2615 	if (!rc) {
2616 		bytes_written = cifs_write(open_file, open_file->pid,
2617 					   write_data, to - from, &offset);
2618 		cifsFileInfo_put(open_file);
2619 		/* Does mm or vfs already set times? */
2620 		simple_inode_init_ts(inode);
2621 		if ((bytes_written > 0) && (offset))
2622 			rc = 0;
2623 		else if (bytes_written < 0)
2624 			rc = bytes_written;
2625 		else
2626 			rc = -EFAULT;
2627 	} else {
2628 		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2629 		if (!is_retryable_error(rc))
2630 			rc = -EIO;
2631 	}
2632 
2633 	kunmap(page);
2634 	return rc;
2635 }
2636 
2637 /*
2638  * Extend the region to be written back to include subsequent contiguously
2639  * dirty pages if possible, but don't sleep while doing so.
2640  */
2641 static void cifs_extend_writeback(struct address_space *mapping,
2642 				  struct xa_state *xas,
2643 				  long *_count,
2644 				  loff_t start,
2645 				  int max_pages,
2646 				  loff_t max_len,
2647 				  size_t *_len)
2648 {
2649 	struct folio_batch batch;
2650 	struct folio *folio;
2651 	unsigned int nr_pages;
2652 	pgoff_t index = (start + *_len) / PAGE_SIZE;
2653 	size_t len;
2654 	bool stop = true;
2655 	unsigned int i;
2656 
2657 	folio_batch_init(&batch);
2658 
2659 	do {
2660 		/* Firstly, we gather up a batch of contiguous dirty pages
2661 		 * under the RCU read lock - but we can't clear the dirty flags
2662 		 * there if any of those pages are mapped.
2663 		 */
2664 		rcu_read_lock();
2665 
2666 		xas_for_each(xas, folio, ULONG_MAX) {
2667 			stop = true;
2668 			if (xas_retry(xas, folio))
2669 				continue;
2670 			if (xa_is_value(folio))
2671 				break;
2672 			if (folio->index != index) {
2673 				xas_reset(xas);
2674 				break;
2675 			}
2676 
2677 			if (!folio_try_get_rcu(folio)) {
2678 				xas_reset(xas);
2679 				continue;
2680 			}
2681 			nr_pages = folio_nr_pages(folio);
2682 			if (nr_pages > max_pages) {
2683 				xas_reset(xas);
2684 				break;
2685 			}
2686 
2687 			/* Has the page moved or been split? */
2688 			if (unlikely(folio != xas_reload(xas))) {
2689 				folio_put(folio);
2690 				xas_reset(xas);
2691 				break;
2692 			}
2693 
2694 			if (!folio_trylock(folio)) {
2695 				folio_put(folio);
2696 				xas_reset(xas);
2697 				break;
2698 			}
2699 			if (!folio_test_dirty(folio) ||
2700 			    folio_test_writeback(folio)) {
2701 				folio_unlock(folio);
2702 				folio_put(folio);
2703 				xas_reset(xas);
2704 				break;
2705 			}
2706 
2707 			max_pages -= nr_pages;
2708 			len = folio_size(folio);
2709 			stop = false;
2710 
2711 			index += nr_pages;
2712 			*_count -= nr_pages;
2713 			*_len += len;
2714 			if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2715 				stop = true;
2716 
2717 			if (!folio_batch_add(&batch, folio))
2718 				break;
2719 			if (stop)
2720 				break;
2721 		}
2722 
2723 		xas_pause(xas);
2724 		rcu_read_unlock();
2725 
2726 		/* Now, if we obtained any pages, we can shift them to being
2727 		 * writable and mark them for caching.
2728 		 */
2729 		if (!folio_batch_count(&batch))
2730 			break;
2731 
2732 		for (i = 0; i < folio_batch_count(&batch); i++) {
2733 			folio = batch.folios[i];
2734 			/* The folio should be locked, dirty and not undergoing
2735 			 * writeback from the loop above.
2736 			 */
2737 			if (!folio_clear_dirty_for_io(folio))
2738 				WARN_ON(1);
2739 			folio_start_writeback(folio);
2740 			folio_unlock(folio);
2741 		}
2742 
2743 		folio_batch_release(&batch);
2744 		cond_resched();
2745 	} while (!stop);
2746 }
2747 
2748 /*
2749  * Write back the locked page and any subsequent non-locked dirty pages.
2750  */
2751 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2752 						 struct writeback_control *wbc,
2753 						 struct xa_state *xas,
2754 						 struct folio *folio,
2755 						 unsigned long long start,
2756 						 unsigned long long end)
2757 {
2758 	struct inode *inode = mapping->host;
2759 	struct TCP_Server_Info *server;
2760 	struct cifs_writedata *wdata;
2761 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2762 	struct cifs_credits credits_on_stack;
2763 	struct cifs_credits *credits = &credits_on_stack;
2764 	struct cifsFileInfo *cfile = NULL;
2765 	unsigned long long i_size = i_size_read(inode), max_len;
2766 	unsigned int xid, wsize;
2767 	size_t len = folio_size(folio);
2768 	long count = wbc->nr_to_write;
2769 	int rc;
2770 
2771 	/* The folio should be locked, dirty and not undergoing writeback. */
2772 	if (!folio_clear_dirty_for_io(folio))
2773 		WARN_ON_ONCE(1);
2774 	folio_start_writeback(folio);
2775 
2776 	count -= folio_nr_pages(folio);
2777 
2778 	xid = get_xid();
2779 	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2780 
2781 	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2782 	if (rc) {
2783 		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2784 		goto err_xid;
2785 	}
2786 
2787 	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2788 					   &wsize, credits);
2789 	if (rc != 0)
2790 		goto err_close;
2791 
2792 	wdata = cifs_writedata_alloc(cifs_writev_complete);
2793 	if (!wdata) {
2794 		rc = -ENOMEM;
2795 		goto err_uncredit;
2796 	}
2797 
2798 	wdata->sync_mode = wbc->sync_mode;
2799 	wdata->offset = folio_pos(folio);
2800 	wdata->pid = cfile->pid;
2801 	wdata->credits = credits_on_stack;
2802 	wdata->cfile = cfile;
2803 	wdata->server = server;
2804 	cfile = NULL;
2805 
2806 	/* Find all consecutive lockable dirty pages that have contiguous
2807 	 * written regions, stopping when we find a page that is not
2808 	 * immediately lockable, is not dirty or is missing, or we reach the
2809 	 * end of the range.
2810 	 */
2811 	if (start < i_size) {
2812 		/* Trim the write to the EOF; the extra data is ignored.  Also
2813 		 * put an upper limit on the size of a single storedata op.
2814 		 */
2815 		max_len = wsize;
2816 		max_len = min_t(unsigned long long, max_len, end - start + 1);
2817 		max_len = min_t(unsigned long long, max_len, i_size - start);
2818 
2819 		if (len < max_len) {
2820 			int max_pages = INT_MAX;
2821 
2822 #ifdef CONFIG_CIFS_SMB_DIRECT
2823 			if (server->smbd_conn)
2824 				max_pages = server->smbd_conn->max_frmr_depth;
2825 #endif
2826 			max_pages -= folio_nr_pages(folio);
2827 
2828 			if (max_pages > 0)
2829 				cifs_extend_writeback(mapping, xas, &count, start,
2830 						      max_pages, max_len, &len);
2831 		}
2832 	}
2833 	len = min_t(unsigned long long, len, i_size - start);
2834 
2835 	/* We now have a contiguous set of dirty pages, each with writeback
2836 	 * set; the first page is still locked at this point, but all the rest
2837 	 * have been unlocked.
2838 	 */
2839 	folio_unlock(folio);
2840 	wdata->bytes = len;
2841 
2842 	if (start < i_size) {
2843 		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2844 				start, len);
2845 
2846 		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2847 		if (rc)
2848 			goto err_wdata;
2849 
2850 		if (wdata->cfile->invalidHandle)
2851 			rc = -EAGAIN;
2852 		else
2853 			rc = wdata->server->ops->async_writev(wdata,
2854 							      cifs_writedata_release);
2855 		if (rc >= 0) {
2856 			kref_put(&wdata->refcount, cifs_writedata_release);
2857 			goto err_close;
2858 		}
2859 	} else {
2860 		/* The dirty region was entirely beyond the EOF. */
2861 		cifs_pages_written_back(inode, start, len);
2862 		rc = 0;
2863 	}
2864 
2865 err_wdata:
2866 	kref_put(&wdata->refcount, cifs_writedata_release);
2867 err_uncredit:
2868 	add_credits_and_wake_if(server, credits, 0);
2869 err_close:
2870 	if (cfile)
2871 		cifsFileInfo_put(cfile);
2872 err_xid:
2873 	free_xid(xid);
2874 	if (rc == 0) {
2875 		wbc->nr_to_write = count;
2876 		rc = len;
2877 	} else if (is_retryable_error(rc)) {
2878 		cifs_pages_write_redirty(inode, start, len);
2879 	} else {
2880 		cifs_pages_write_failed(inode, start, len);
2881 		mapping_set_error(mapping, rc);
2882 	}
2883 	/* Indication to update ctime and mtime as close is deferred */
2884 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2885 	return rc;
2886 }
2887 
2888 /*
2889  * write a region of pages back to the server
2890  */
2891 static ssize_t cifs_writepages_begin(struct address_space *mapping,
2892 				     struct writeback_control *wbc,
2893 				     struct xa_state *xas,
2894 				     unsigned long long *_start,
2895 				     unsigned long long end)
2896 {
2897 	struct folio *folio;
2898 	unsigned long long start = *_start;
2899 	ssize_t ret;
2900 	int skips = 0;
2901 
2902 search_again:
2903 	/* Find the first dirty page. */
2904 	rcu_read_lock();
2905 
2906 	for (;;) {
2907 		folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2908 		if (xas_retry(xas, folio) || xa_is_value(folio))
2909 			continue;
2910 		if (!folio)
2911 			break;
2912 
2913 		if (!folio_try_get_rcu(folio)) {
2914 			xas_reset(xas);
2915 			continue;
2916 		}
2917 
2918 		if (unlikely(folio != xas_reload(xas))) {
2919 			folio_put(folio);
2920 			xas_reset(xas);
2921 			continue;
2922 		}
2923 
2924 		xas_pause(xas);
2925 		break;
2926 	}
2927 	rcu_read_unlock();
2928 	if (!folio)
2929 		return 0;
2930 
2931 	start = folio_pos(folio); /* May regress with THPs */
2932 
2933 	/* At this point we hold neither the i_pages lock nor the page lock:
2934 	 * the page may be truncated or invalidated (changing page->mapping to
2935 	 * NULL), or even swizzled back from swapper_space to tmpfs file
2936 	 * mapping
2937 	 */
2938 lock_again:
2939 	if (wbc->sync_mode != WB_SYNC_NONE) {
2940 		ret = folio_lock_killable(folio);
2941 		if (ret < 0)
2942 			return ret;
2943 	} else {
2944 		if (!folio_trylock(folio))
2945 			goto search_again;
2946 	}
2947 
2948 	if (folio->mapping != mapping ||
2949 	    !folio_test_dirty(folio)) {
2950 		start += folio_size(folio);
2951 		folio_unlock(folio);
2952 		goto search_again;
2953 	}
2954 
2955 	if (folio_test_writeback(folio) ||
2956 	    folio_test_fscache(folio)) {
2957 		folio_unlock(folio);
2958 		if (wbc->sync_mode != WB_SYNC_NONE) {
2959 			folio_wait_writeback(folio);
2960 #ifdef CONFIG_CIFS_FSCACHE
2961 			folio_wait_fscache(folio);
2962 #endif
2963 			goto lock_again;
2964 		}
2965 
2966 		start += folio_size(folio);
2967 		if (wbc->sync_mode == WB_SYNC_NONE) {
2968 			if (skips >= 5 || need_resched()) {
2969 				ret = 0;
2970 				goto out;
2971 			}
2972 			skips++;
2973 		}
2974 		goto search_again;
2975 	}
2976 
2977 	ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
2978 out:
2979 	if (ret > 0)
2980 		*_start = start + ret;
2981 	return ret;
2982 }
2983 
2984 /*
2985  * Write a region of pages back to the server
2986  */
2987 static int cifs_writepages_region(struct address_space *mapping,
2988 				  struct writeback_control *wbc,
2989 				  unsigned long long *_start,
2990 				  unsigned long long end)
2991 {
2992 	ssize_t ret;
2993 
2994 	XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
2995 
2996 	do {
2997 		ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
2998 		if (ret > 0 && wbc->nr_to_write > 0)
2999 			cond_resched();
3000 	} while (ret > 0 && wbc->nr_to_write > 0);
3001 
3002 	return ret > 0 ? 0 : ret;
3003 }
3004 
3005 /*
3006  * Write some of the pending data back to the server
3007  */
3008 static int cifs_writepages(struct address_space *mapping,
3009 			   struct writeback_control *wbc)
3010 {
3011 	loff_t start, end;
3012 	int ret;
3013 
3014 	/* We have to be careful as we can end up racing with setattr()
3015 	 * truncating the pagecache since the caller doesn't take a lock here
3016 	 * to prevent it.
3017 	 */
3018 
3019 	if (wbc->range_cyclic && mapping->writeback_index) {
3020 		start = mapping->writeback_index * PAGE_SIZE;
3021 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3022 		if (ret < 0)
3023 			goto out;
3024 
3025 		if (wbc->nr_to_write <= 0) {
3026 			mapping->writeback_index = start / PAGE_SIZE;
3027 			goto out;
3028 		}
3029 
3030 		start = 0;
3031 		end = mapping->writeback_index * PAGE_SIZE;
3032 		mapping->writeback_index = 0;
3033 		ret = cifs_writepages_region(mapping, wbc, &start, end);
3034 		if (ret == 0)
3035 			mapping->writeback_index = start / PAGE_SIZE;
3036 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3037 		start = 0;
3038 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3039 		if (wbc->nr_to_write > 0 && ret == 0)
3040 			mapping->writeback_index = start / PAGE_SIZE;
3041 	} else {
3042 		start = wbc->range_start;
3043 		ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3044 	}
3045 
3046 out:
3047 	return ret;
3048 }
3049 
3050 static int
3051 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3052 {
3053 	int rc;
3054 	unsigned int xid;
3055 
3056 	xid = get_xid();
3057 /* BB add check for wbc flags */
3058 	get_page(page);
3059 	if (!PageUptodate(page))
3060 		cifs_dbg(FYI, "ppw - page not up to date\n");
3061 
3062 	/*
3063 	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3064 	 *
3065 	 * A writepage() implementation always needs to do either this,
3066 	 * or re-dirty the page with "redirty_page_for_writepage()" in
3067 	 * the case of a failure.
3068 	 *
3069 	 * Just unlocking the page will cause the radix tree tag-bits
3070 	 * to fail to update with the state of the page correctly.
3071 	 */
3072 	set_page_writeback(page);
3073 retry_write:
3074 	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3075 	if (is_retryable_error(rc)) {
3076 		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3077 			goto retry_write;
3078 		redirty_page_for_writepage(wbc, page);
3079 	} else if (rc != 0) {
3080 		SetPageError(page);
3081 		mapping_set_error(page->mapping, rc);
3082 	} else {
3083 		SetPageUptodate(page);
3084 	}
3085 	end_page_writeback(page);
3086 	put_page(page);
3087 	free_xid(xid);
3088 	return rc;
3089 }
3090 
3091 static int cifs_write_end(struct file *file, struct address_space *mapping,
3092 			loff_t pos, unsigned len, unsigned copied,
3093 			struct page *page, void *fsdata)
3094 {
3095 	int rc;
3096 	struct inode *inode = mapping->host;
3097 	struct cifsFileInfo *cfile = file->private_data;
3098 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3099 	struct folio *folio = page_folio(page);
3100 	__u32 pid;
3101 
3102 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3103 		pid = cfile->pid;
3104 	else
3105 		pid = current->tgid;
3106 
3107 	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3108 		 page, pos, copied);
3109 
3110 	if (folio_test_checked(folio)) {
3111 		if (copied == len)
3112 			folio_mark_uptodate(folio);
3113 		folio_clear_checked(folio);
3114 	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3115 		folio_mark_uptodate(folio);
3116 
3117 	if (!folio_test_uptodate(folio)) {
3118 		char *page_data;
3119 		unsigned offset = pos & (PAGE_SIZE - 1);
3120 		unsigned int xid;
3121 
3122 		xid = get_xid();
3123 		/* this is probably better than directly calling
3124 		   partialpage_write since in this function the file handle is
3125 		   known which we might as well	leverage */
3126 		/* BB check if anything else missing out of ppw
3127 		   such as updating last write time */
3128 		page_data = kmap(page);
3129 		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3130 		/* if (rc < 0) should we set writebehind rc? */
3131 		kunmap(page);
3132 
3133 		free_xid(xid);
3134 	} else {
3135 		rc = copied;
3136 		pos += copied;
3137 		set_page_dirty(page);
3138 	}
3139 
3140 	if (rc > 0) {
3141 		spin_lock(&inode->i_lock);
3142 		if (pos > inode->i_size) {
3143 			loff_t additional_blocks = (512 - 1 + copied) >> 9;
3144 
3145 			i_size_write(inode, pos);
3146 			/*
3147 			 * Estimate new allocation size based on the amount written.
3148 			 * This will be updated from server on close (and on queryinfo)
3149 			 */
3150 			inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9,
3151 						inode->i_blocks + additional_blocks);
3152 		}
3153 		spin_unlock(&inode->i_lock);
3154 	}
3155 
3156 	unlock_page(page);
3157 	put_page(page);
3158 	/* Indication to update ctime and mtime as close is deferred */
3159 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3160 
3161 	return rc;
3162 }
3163 
3164 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3165 		      int datasync)
3166 {
3167 	unsigned int xid;
3168 	int rc = 0;
3169 	struct cifs_tcon *tcon;
3170 	struct TCP_Server_Info *server;
3171 	struct cifsFileInfo *smbfile = file->private_data;
3172 	struct inode *inode = file_inode(file);
3173 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3174 
3175 	rc = file_write_and_wait_range(file, start, end);
3176 	if (rc) {
3177 		trace_cifs_fsync_err(inode->i_ino, rc);
3178 		return rc;
3179 	}
3180 
3181 	xid = get_xid();
3182 
3183 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3184 		 file, datasync);
3185 
3186 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3187 		rc = cifs_zap_mapping(inode);
3188 		if (rc) {
3189 			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3190 			rc = 0; /* don't care about it in fsync */
3191 		}
3192 	}
3193 
3194 	tcon = tlink_tcon(smbfile->tlink);
3195 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3196 		server = tcon->ses->server;
3197 		if (server->ops->flush == NULL) {
3198 			rc = -ENOSYS;
3199 			goto strict_fsync_exit;
3200 		}
3201 
3202 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3203 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3204 			if (smbfile) {
3205 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3206 				cifsFileInfo_put(smbfile);
3207 			} else
3208 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3209 		} else
3210 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3211 	}
3212 
3213 strict_fsync_exit:
3214 	free_xid(xid);
3215 	return rc;
3216 }
3217 
3218 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3219 {
3220 	unsigned int xid;
3221 	int rc = 0;
3222 	struct cifs_tcon *tcon;
3223 	struct TCP_Server_Info *server;
3224 	struct cifsFileInfo *smbfile = file->private_data;
3225 	struct inode *inode = file_inode(file);
3226 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3227 
3228 	rc = file_write_and_wait_range(file, start, end);
3229 	if (rc) {
3230 		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3231 		return rc;
3232 	}
3233 
3234 	xid = get_xid();
3235 
3236 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3237 		 file, datasync);
3238 
3239 	tcon = tlink_tcon(smbfile->tlink);
3240 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3241 		server = tcon->ses->server;
3242 		if (server->ops->flush == NULL) {
3243 			rc = -ENOSYS;
3244 			goto fsync_exit;
3245 		}
3246 
3247 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3248 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3249 			if (smbfile) {
3250 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3251 				cifsFileInfo_put(smbfile);
3252 			} else
3253 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3254 		} else
3255 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3256 	}
3257 
3258 fsync_exit:
3259 	free_xid(xid);
3260 	return rc;
3261 }
3262 
3263 /*
3264  * As file closes, flush all cached write data for this inode checking
3265  * for write behind errors.
3266  */
3267 int cifs_flush(struct file *file, fl_owner_t id)
3268 {
3269 	struct inode *inode = file_inode(file);
3270 	int rc = 0;
3271 
3272 	if (file->f_mode & FMODE_WRITE)
3273 		rc = filemap_write_and_wait(inode->i_mapping);
3274 
3275 	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3276 	if (rc) {
3277 		/* get more nuanced writeback errors */
3278 		rc = filemap_check_wb_err(file->f_mapping, 0);
3279 		trace_cifs_flush_err(inode->i_ino, rc);
3280 	}
3281 	return rc;
3282 }
3283 
3284 static void
3285 cifs_uncached_writedata_release(struct kref *refcount)
3286 {
3287 	struct cifs_writedata *wdata = container_of(refcount,
3288 					struct cifs_writedata, refcount);
3289 
3290 	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3291 	cifs_writedata_release(refcount);
3292 }
3293 
3294 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3295 
3296 static void
3297 cifs_uncached_writev_complete(struct work_struct *work)
3298 {
3299 	struct cifs_writedata *wdata = container_of(work,
3300 					struct cifs_writedata, work);
3301 	struct inode *inode = d_inode(wdata->cfile->dentry);
3302 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3303 
3304 	spin_lock(&inode->i_lock);
3305 	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3306 	if (cifsi->netfs.remote_i_size > inode->i_size)
3307 		i_size_write(inode, cifsi->netfs.remote_i_size);
3308 	spin_unlock(&inode->i_lock);
3309 
3310 	complete(&wdata->done);
3311 	collect_uncached_write_data(wdata->ctx);
3312 	/* the below call can possibly free the last ref to aio ctx */
3313 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3314 }
3315 
3316 static int
3317 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3318 	struct cifs_aio_ctx *ctx)
3319 {
3320 	unsigned int wsize;
3321 	struct cifs_credits credits;
3322 	int rc;
3323 	struct TCP_Server_Info *server = wdata->server;
3324 
3325 	do {
3326 		if (wdata->cfile->invalidHandle) {
3327 			rc = cifs_reopen_file(wdata->cfile, false);
3328 			if (rc == -EAGAIN)
3329 				continue;
3330 			else if (rc)
3331 				break;
3332 		}
3333 
3334 
3335 		/*
3336 		 * Wait for credits to resend this wdata.
3337 		 * Note: we are attempting to resend the whole wdata not in
3338 		 * segments
3339 		 */
3340 		do {
3341 			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3342 						&wsize, &credits);
3343 			if (rc)
3344 				goto fail;
3345 
3346 			if (wsize < wdata->bytes) {
3347 				add_credits_and_wake_if(server, &credits, 0);
3348 				msleep(1000);
3349 			}
3350 		} while (wsize < wdata->bytes);
3351 		wdata->credits = credits;
3352 
3353 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3354 
3355 		if (!rc) {
3356 			if (wdata->cfile->invalidHandle)
3357 				rc = -EAGAIN;
3358 			else {
3359 				wdata->replay = true;
3360 #ifdef CONFIG_CIFS_SMB_DIRECT
3361 				if (wdata->mr) {
3362 					wdata->mr->need_invalidate = true;
3363 					smbd_deregister_mr(wdata->mr);
3364 					wdata->mr = NULL;
3365 				}
3366 #endif
3367 				rc = server->ops->async_writev(wdata,
3368 					cifs_uncached_writedata_release);
3369 			}
3370 		}
3371 
3372 		/* If the write was successfully sent, we are done */
3373 		if (!rc) {
3374 			list_add_tail(&wdata->list, wdata_list);
3375 			return 0;
3376 		}
3377 
3378 		/* Roll back credits and retry if needed */
3379 		add_credits_and_wake_if(server, &wdata->credits, 0);
3380 	} while (rc == -EAGAIN);
3381 
3382 fail:
3383 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3384 	return rc;
3385 }
3386 
3387 /*
3388  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3389  * size and maximum number of segments.
3390  */
3391 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3392 				     size_t max_segs, unsigned int *_nsegs)
3393 {
3394 	const struct bio_vec *bvecs = iter->bvec;
3395 	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3396 	size_t len, span = 0, n = iter->count;
3397 	size_t skip = iter->iov_offset;
3398 
3399 	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3400 		return 0;
3401 
3402 	while (n && ix < nbv && skip) {
3403 		len = bvecs[ix].bv_len;
3404 		if (skip < len)
3405 			break;
3406 		skip -= len;
3407 		n -= len;
3408 		ix++;
3409 	}
3410 
3411 	while (n && ix < nbv) {
3412 		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3413 		span += len;
3414 		max_size -= len;
3415 		nsegs++;
3416 		ix++;
3417 		if (max_size == 0 || nsegs >= max_segs)
3418 			break;
3419 		skip = 0;
3420 		n -= len;
3421 	}
3422 
3423 	*_nsegs = nsegs;
3424 	return span;
3425 }
3426 
3427 static int
3428 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3429 		     struct cifsFileInfo *open_file,
3430 		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3431 		     struct cifs_aio_ctx *ctx)
3432 {
3433 	int rc = 0;
3434 	size_t cur_len, max_len;
3435 	struct cifs_writedata *wdata;
3436 	pid_t pid;
3437 	struct TCP_Server_Info *server;
3438 	unsigned int xid, max_segs = INT_MAX;
3439 
3440 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3441 		pid = open_file->pid;
3442 	else
3443 		pid = current->tgid;
3444 
3445 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3446 	xid = get_xid();
3447 
3448 #ifdef CONFIG_CIFS_SMB_DIRECT
3449 	if (server->smbd_conn)
3450 		max_segs = server->smbd_conn->max_frmr_depth;
3451 #endif
3452 
3453 	do {
3454 		struct cifs_credits credits_on_stack;
3455 		struct cifs_credits *credits = &credits_on_stack;
3456 		unsigned int wsize, nsegs = 0;
3457 
3458 		if (signal_pending(current)) {
3459 			rc = -EINTR;
3460 			break;
3461 		}
3462 
3463 		if (open_file->invalidHandle) {
3464 			rc = cifs_reopen_file(open_file, false);
3465 			if (rc == -EAGAIN)
3466 				continue;
3467 			else if (rc)
3468 				break;
3469 		}
3470 
3471 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3472 						   &wsize, credits);
3473 		if (rc)
3474 			break;
3475 
3476 		max_len = min_t(const size_t, len, wsize);
3477 		if (!max_len) {
3478 			rc = -EAGAIN;
3479 			add_credits_and_wake_if(server, credits, 0);
3480 			break;
3481 		}
3482 
3483 		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3484 		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3485 			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3486 		if (cur_len == 0) {
3487 			rc = -EIO;
3488 			add_credits_and_wake_if(server, credits, 0);
3489 			break;
3490 		}
3491 
3492 		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3493 		if (!wdata) {
3494 			rc = -ENOMEM;
3495 			add_credits_and_wake_if(server, credits, 0);
3496 			break;
3497 		}
3498 
3499 		wdata->sync_mode = WB_SYNC_ALL;
3500 		wdata->offset	= (__u64)fpos;
3501 		wdata->cfile	= cifsFileInfo_get(open_file);
3502 		wdata->server	= server;
3503 		wdata->pid	= pid;
3504 		wdata->bytes	= cur_len;
3505 		wdata->credits	= credits_on_stack;
3506 		wdata->iter	= *from;
3507 		wdata->ctx	= ctx;
3508 		kref_get(&ctx->refcount);
3509 
3510 		iov_iter_truncate(&wdata->iter, cur_len);
3511 
3512 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3513 
3514 		if (!rc) {
3515 			if (wdata->cfile->invalidHandle)
3516 				rc = -EAGAIN;
3517 			else
3518 				rc = server->ops->async_writev(wdata,
3519 					cifs_uncached_writedata_release);
3520 		}
3521 
3522 		if (rc) {
3523 			add_credits_and_wake_if(server, &wdata->credits, 0);
3524 			kref_put(&wdata->refcount,
3525 				 cifs_uncached_writedata_release);
3526 			if (rc == -EAGAIN)
3527 				continue;
3528 			break;
3529 		}
3530 
3531 		list_add_tail(&wdata->list, wdata_list);
3532 		iov_iter_advance(from, cur_len);
3533 		fpos += cur_len;
3534 		len -= cur_len;
3535 	} while (len > 0);
3536 
3537 	free_xid(xid);
3538 	return rc;
3539 }
3540 
3541 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3542 {
3543 	struct cifs_writedata *wdata, *tmp;
3544 	struct cifs_tcon *tcon;
3545 	struct cifs_sb_info *cifs_sb;
3546 	struct dentry *dentry = ctx->cfile->dentry;
3547 	ssize_t rc;
3548 
3549 	tcon = tlink_tcon(ctx->cfile->tlink);
3550 	cifs_sb = CIFS_SB(dentry->d_sb);
3551 
3552 	mutex_lock(&ctx->aio_mutex);
3553 
3554 	if (list_empty(&ctx->list)) {
3555 		mutex_unlock(&ctx->aio_mutex);
3556 		return;
3557 	}
3558 
3559 	rc = ctx->rc;
3560 	/*
3561 	 * Wait for and collect replies for any successful sends in order of
3562 	 * increasing offset. Once an error is hit, then return without waiting
3563 	 * for any more replies.
3564 	 */
3565 restart_loop:
3566 	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3567 		if (!rc) {
3568 			if (!try_wait_for_completion(&wdata->done)) {
3569 				mutex_unlock(&ctx->aio_mutex);
3570 				return;
3571 			}
3572 
3573 			if (wdata->result)
3574 				rc = wdata->result;
3575 			else
3576 				ctx->total_len += wdata->bytes;
3577 
3578 			/* resend call if it's a retryable error */
3579 			if (rc == -EAGAIN) {
3580 				struct list_head tmp_list;
3581 				struct iov_iter tmp_from = ctx->iter;
3582 
3583 				INIT_LIST_HEAD(&tmp_list);
3584 				list_del_init(&wdata->list);
3585 
3586 				if (ctx->direct_io)
3587 					rc = cifs_resend_wdata(
3588 						wdata, &tmp_list, ctx);
3589 				else {
3590 					iov_iter_advance(&tmp_from,
3591 						 wdata->offset - ctx->pos);
3592 
3593 					rc = cifs_write_from_iter(wdata->offset,
3594 						wdata->bytes, &tmp_from,
3595 						ctx->cfile, cifs_sb, &tmp_list,
3596 						ctx);
3597 
3598 					kref_put(&wdata->refcount,
3599 						cifs_uncached_writedata_release);
3600 				}
3601 
3602 				list_splice(&tmp_list, &ctx->list);
3603 				goto restart_loop;
3604 			}
3605 		}
3606 		list_del_init(&wdata->list);
3607 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3608 	}
3609 
3610 	cifs_stats_bytes_written(tcon, ctx->total_len);
3611 	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3612 
3613 	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3614 
3615 	mutex_unlock(&ctx->aio_mutex);
3616 
3617 	if (ctx->iocb && ctx->iocb->ki_complete)
3618 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3619 	else
3620 		complete(&ctx->done);
3621 }
3622 
3623 static ssize_t __cifs_writev(
3624 	struct kiocb *iocb, struct iov_iter *from, bool direct)
3625 {
3626 	struct file *file = iocb->ki_filp;
3627 	ssize_t total_written = 0;
3628 	struct cifsFileInfo *cfile;
3629 	struct cifs_tcon *tcon;
3630 	struct cifs_sb_info *cifs_sb;
3631 	struct cifs_aio_ctx *ctx;
3632 	int rc;
3633 
3634 	rc = generic_write_checks(iocb, from);
3635 	if (rc <= 0)
3636 		return rc;
3637 
3638 	cifs_sb = CIFS_FILE_SB(file);
3639 	cfile = file->private_data;
3640 	tcon = tlink_tcon(cfile->tlink);
3641 
3642 	if (!tcon->ses->server->ops->async_writev)
3643 		return -ENOSYS;
3644 
3645 	ctx = cifs_aio_ctx_alloc();
3646 	if (!ctx)
3647 		return -ENOMEM;
3648 
3649 	ctx->cfile = cifsFileInfo_get(cfile);
3650 
3651 	if (!is_sync_kiocb(iocb))
3652 		ctx->iocb = iocb;
3653 
3654 	ctx->pos = iocb->ki_pos;
3655 	ctx->direct_io = direct;
3656 	ctx->nr_pinned_pages = 0;
3657 
3658 	if (user_backed_iter(from)) {
3659 		/*
3660 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3661 		 * they contain references to the calling process's virtual
3662 		 * memory layout which won't be available in an async worker
3663 		 * thread.  This also takes a pin on every folio involved.
3664 		 */
3665 		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3666 					     &ctx->iter, 0);
3667 		if (rc < 0) {
3668 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3669 			return rc;
3670 		}
3671 
3672 		ctx->nr_pinned_pages = rc;
3673 		ctx->bv = (void *)ctx->iter.bvec;
3674 		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3675 	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3676 		   !is_sync_kiocb(iocb)) {
3677 		/*
3678 		 * If the op is asynchronous, we need to copy the list attached
3679 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3680 		 * will be pinned by the caller; in any case, we may or may not
3681 		 * be able to pin the pages, so we don't try.
3682 		 */
3683 		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3684 		if (!ctx->bv) {
3685 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3686 			return -ENOMEM;
3687 		}
3688 	} else {
3689 		/*
3690 		 * Otherwise, we just pass the iterator down as-is and rely on
3691 		 * the caller to make sure the pages referred to by the
3692 		 * iterator don't evaporate.
3693 		 */
3694 		ctx->iter = *from;
3695 	}
3696 
3697 	ctx->len = iov_iter_count(&ctx->iter);
3698 
3699 	/* grab a lock here due to read response handlers can access ctx */
3700 	mutex_lock(&ctx->aio_mutex);
3701 
3702 	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3703 				  cfile, cifs_sb, &ctx->list, ctx);
3704 
3705 	/*
3706 	 * If at least one write was successfully sent, then discard any rc
3707 	 * value from the later writes. If the other write succeeds, then
3708 	 * we'll end up returning whatever was written. If it fails, then
3709 	 * we'll get a new rc value from that.
3710 	 */
3711 	if (!list_empty(&ctx->list))
3712 		rc = 0;
3713 
3714 	mutex_unlock(&ctx->aio_mutex);
3715 
3716 	if (rc) {
3717 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3718 		return rc;
3719 	}
3720 
3721 	if (!is_sync_kiocb(iocb)) {
3722 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3723 		return -EIOCBQUEUED;
3724 	}
3725 
3726 	rc = wait_for_completion_killable(&ctx->done);
3727 	if (rc) {
3728 		mutex_lock(&ctx->aio_mutex);
3729 		ctx->rc = rc = -EINTR;
3730 		total_written = ctx->total_len;
3731 		mutex_unlock(&ctx->aio_mutex);
3732 	} else {
3733 		rc = ctx->rc;
3734 		total_written = ctx->total_len;
3735 	}
3736 
3737 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3738 
3739 	if (unlikely(!total_written))
3740 		return rc;
3741 
3742 	iocb->ki_pos += total_written;
3743 	return total_written;
3744 }
3745 
3746 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3747 {
3748 	struct file *file = iocb->ki_filp;
3749 
3750 	cifs_revalidate_mapping(file->f_inode);
3751 	return __cifs_writev(iocb, from, true);
3752 }
3753 
3754 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3755 {
3756 	return __cifs_writev(iocb, from, false);
3757 }
3758 
3759 static ssize_t
3760 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3761 {
3762 	struct file *file = iocb->ki_filp;
3763 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3764 	struct inode *inode = file->f_mapping->host;
3765 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3766 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3767 	ssize_t rc;
3768 
3769 	inode_lock(inode);
3770 	/*
3771 	 * We need to hold the sem to be sure nobody modifies lock list
3772 	 * with a brlock that prevents writing.
3773 	 */
3774 	down_read(&cinode->lock_sem);
3775 
3776 	rc = generic_write_checks(iocb, from);
3777 	if (rc <= 0)
3778 		goto out;
3779 
3780 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3781 				     server->vals->exclusive_lock_type, 0,
3782 				     NULL, CIFS_WRITE_OP))
3783 		rc = __generic_file_write_iter(iocb, from);
3784 	else
3785 		rc = -EACCES;
3786 out:
3787 	up_read(&cinode->lock_sem);
3788 	inode_unlock(inode);
3789 
3790 	if (rc > 0)
3791 		rc = generic_write_sync(iocb, rc);
3792 	return rc;
3793 }
3794 
3795 ssize_t
3796 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3797 {
3798 	struct inode *inode = file_inode(iocb->ki_filp);
3799 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3800 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3801 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3802 						iocb->ki_filp->private_data;
3803 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3804 	ssize_t written;
3805 
3806 	written = cifs_get_writer(cinode);
3807 	if (written)
3808 		return written;
3809 
3810 	if (CIFS_CACHE_WRITE(cinode)) {
3811 		if (cap_unix(tcon->ses) &&
3812 		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3813 		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3814 			written = generic_file_write_iter(iocb, from);
3815 			goto out;
3816 		}
3817 		written = cifs_writev(iocb, from);
3818 		goto out;
3819 	}
3820 	/*
3821 	 * For non-oplocked files in strict cache mode we need to write the data
3822 	 * to the server exactly from the pos to pos+len-1 rather than flush all
3823 	 * affected pages because it may cause a error with mandatory locks on
3824 	 * these pages but not on the region from pos to ppos+len-1.
3825 	 */
3826 	written = cifs_user_writev(iocb, from);
3827 	if (CIFS_CACHE_READ(cinode)) {
3828 		/*
3829 		 * We have read level caching and we have just sent a write
3830 		 * request to the server thus making data in the cache stale.
3831 		 * Zap the cache and set oplock/lease level to NONE to avoid
3832 		 * reading stale data from the cache. All subsequent read
3833 		 * operations will read new data from the server.
3834 		 */
3835 		cifs_zap_mapping(inode);
3836 		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3837 			 inode);
3838 		cinode->oplock = 0;
3839 	}
3840 out:
3841 	cifs_put_writer(cinode);
3842 	return written;
3843 }
3844 
3845 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3846 {
3847 	struct cifs_readdata *rdata;
3848 
3849 	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3850 	if (rdata) {
3851 		kref_init(&rdata->refcount);
3852 		INIT_LIST_HEAD(&rdata->list);
3853 		init_completion(&rdata->done);
3854 		INIT_WORK(&rdata->work, complete);
3855 	}
3856 
3857 	return rdata;
3858 }
3859 
3860 void
3861 cifs_readdata_release(struct kref *refcount)
3862 {
3863 	struct cifs_readdata *rdata = container_of(refcount,
3864 					struct cifs_readdata, refcount);
3865 
3866 	if (rdata->ctx)
3867 		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3868 #ifdef CONFIG_CIFS_SMB_DIRECT
3869 	if (rdata->mr) {
3870 		smbd_deregister_mr(rdata->mr);
3871 		rdata->mr = NULL;
3872 	}
3873 #endif
3874 	if (rdata->cfile)
3875 		cifsFileInfo_put(rdata->cfile);
3876 
3877 	kfree(rdata);
3878 }
3879 
3880 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3881 
3882 static void
3883 cifs_uncached_readv_complete(struct work_struct *work)
3884 {
3885 	struct cifs_readdata *rdata = container_of(work,
3886 						struct cifs_readdata, work);
3887 
3888 	complete(&rdata->done);
3889 	collect_uncached_read_data(rdata->ctx);
3890 	/* the below call can possibly free the last ref to aio ctx */
3891 	kref_put(&rdata->refcount, cifs_readdata_release);
3892 }
3893 
3894 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3895 			struct list_head *rdata_list,
3896 			struct cifs_aio_ctx *ctx)
3897 {
3898 	unsigned int rsize;
3899 	struct cifs_credits credits;
3900 	int rc;
3901 	struct TCP_Server_Info *server;
3902 
3903 	/* XXX: should we pick a new channel here? */
3904 	server = rdata->server;
3905 
3906 	do {
3907 		if (rdata->cfile->invalidHandle) {
3908 			rc = cifs_reopen_file(rdata->cfile, true);
3909 			if (rc == -EAGAIN)
3910 				continue;
3911 			else if (rc)
3912 				break;
3913 		}
3914 
3915 		/*
3916 		 * Wait for credits to resend this rdata.
3917 		 * Note: we are attempting to resend the whole rdata not in
3918 		 * segments
3919 		 */
3920 		do {
3921 			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3922 						&rsize, &credits);
3923 
3924 			if (rc)
3925 				goto fail;
3926 
3927 			if (rsize < rdata->bytes) {
3928 				add_credits_and_wake_if(server, &credits, 0);
3929 				msleep(1000);
3930 			}
3931 		} while (rsize < rdata->bytes);
3932 		rdata->credits = credits;
3933 
3934 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3935 		if (!rc) {
3936 			if (rdata->cfile->invalidHandle)
3937 				rc = -EAGAIN;
3938 			else {
3939 #ifdef CONFIG_CIFS_SMB_DIRECT
3940 				if (rdata->mr) {
3941 					rdata->mr->need_invalidate = true;
3942 					smbd_deregister_mr(rdata->mr);
3943 					rdata->mr = NULL;
3944 				}
3945 #endif
3946 				rc = server->ops->async_readv(rdata);
3947 			}
3948 		}
3949 
3950 		/* If the read was successfully sent, we are done */
3951 		if (!rc) {
3952 			/* Add to aio pending list */
3953 			list_add_tail(&rdata->list, rdata_list);
3954 			return 0;
3955 		}
3956 
3957 		/* Roll back credits and retry if needed */
3958 		add_credits_and_wake_if(server, &rdata->credits, 0);
3959 	} while (rc == -EAGAIN);
3960 
3961 fail:
3962 	kref_put(&rdata->refcount, cifs_readdata_release);
3963 	return rc;
3964 }
3965 
3966 static int
3967 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
3968 		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3969 		     struct cifs_aio_ctx *ctx)
3970 {
3971 	struct cifs_readdata *rdata;
3972 	unsigned int rsize, nsegs, max_segs = INT_MAX;
3973 	struct cifs_credits credits_on_stack;
3974 	struct cifs_credits *credits = &credits_on_stack;
3975 	size_t cur_len, max_len;
3976 	int rc;
3977 	pid_t pid;
3978 	struct TCP_Server_Info *server;
3979 
3980 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3981 
3982 #ifdef CONFIG_CIFS_SMB_DIRECT
3983 	if (server->smbd_conn)
3984 		max_segs = server->smbd_conn->max_frmr_depth;
3985 #endif
3986 
3987 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3988 		pid = open_file->pid;
3989 	else
3990 		pid = current->tgid;
3991 
3992 	do {
3993 		if (open_file->invalidHandle) {
3994 			rc = cifs_reopen_file(open_file, true);
3995 			if (rc == -EAGAIN)
3996 				continue;
3997 			else if (rc)
3998 				break;
3999 		}
4000 
4001 		if (cifs_sb->ctx->rsize == 0)
4002 			cifs_sb->ctx->rsize =
4003 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4004 							     cifs_sb->ctx);
4005 
4006 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4007 						   &rsize, credits);
4008 		if (rc)
4009 			break;
4010 
4011 		max_len = min_t(size_t, len, rsize);
4012 
4013 		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
4014 						 max_segs, &nsegs);
4015 		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
4016 			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
4017 		if (cur_len == 0) {
4018 			rc = -EIO;
4019 			add_credits_and_wake_if(server, credits, 0);
4020 			break;
4021 		}
4022 
4023 		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
4024 		if (!rdata) {
4025 			add_credits_and_wake_if(server, credits, 0);
4026 			rc = -ENOMEM;
4027 			break;
4028 		}
4029 
4030 		rdata->server	= server;
4031 		rdata->cfile	= cifsFileInfo_get(open_file);
4032 		rdata->offset	= fpos;
4033 		rdata->bytes	= cur_len;
4034 		rdata->pid	= pid;
4035 		rdata->credits	= credits_on_stack;
4036 		rdata->ctx	= ctx;
4037 		kref_get(&ctx->refcount);
4038 
4039 		rdata->iter	= ctx->iter;
4040 		iov_iter_truncate(&rdata->iter, cur_len);
4041 
4042 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4043 
4044 		if (!rc) {
4045 			if (rdata->cfile->invalidHandle)
4046 				rc = -EAGAIN;
4047 			else
4048 				rc = server->ops->async_readv(rdata);
4049 		}
4050 
4051 		if (rc) {
4052 			add_credits_and_wake_if(server, &rdata->credits, 0);
4053 			kref_put(&rdata->refcount, cifs_readdata_release);
4054 			if (rc == -EAGAIN)
4055 				continue;
4056 			break;
4057 		}
4058 
4059 		list_add_tail(&rdata->list, rdata_list);
4060 		iov_iter_advance(&ctx->iter, cur_len);
4061 		fpos += cur_len;
4062 		len -= cur_len;
4063 	} while (len > 0);
4064 
4065 	return rc;
4066 }
4067 
4068 static void
4069 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4070 {
4071 	struct cifs_readdata *rdata, *tmp;
4072 	struct cifs_sb_info *cifs_sb;
4073 	int rc;
4074 
4075 	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4076 
4077 	mutex_lock(&ctx->aio_mutex);
4078 
4079 	if (list_empty(&ctx->list)) {
4080 		mutex_unlock(&ctx->aio_mutex);
4081 		return;
4082 	}
4083 
4084 	rc = ctx->rc;
4085 	/* the loop below should proceed in the order of increasing offsets */
4086 again:
4087 	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4088 		if (!rc) {
4089 			if (!try_wait_for_completion(&rdata->done)) {
4090 				mutex_unlock(&ctx->aio_mutex);
4091 				return;
4092 			}
4093 
4094 			if (rdata->result == -EAGAIN) {
4095 				/* resend call if it's a retryable error */
4096 				struct list_head tmp_list;
4097 				unsigned int got_bytes = rdata->got_bytes;
4098 
4099 				list_del_init(&rdata->list);
4100 				INIT_LIST_HEAD(&tmp_list);
4101 
4102 				if (ctx->direct_io) {
4103 					/*
4104 					 * Re-use rdata as this is a
4105 					 * direct I/O
4106 					 */
4107 					rc = cifs_resend_rdata(
4108 						rdata,
4109 						&tmp_list, ctx);
4110 				} else {
4111 					rc = cifs_send_async_read(
4112 						rdata->offset + got_bytes,
4113 						rdata->bytes - got_bytes,
4114 						rdata->cfile, cifs_sb,
4115 						&tmp_list, ctx);
4116 
4117 					kref_put(&rdata->refcount,
4118 						cifs_readdata_release);
4119 				}
4120 
4121 				list_splice(&tmp_list, &ctx->list);
4122 
4123 				goto again;
4124 			} else if (rdata->result)
4125 				rc = rdata->result;
4126 
4127 			/* if there was a short read -- discard anything left */
4128 			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4129 				rc = -ENODATA;
4130 
4131 			ctx->total_len += rdata->got_bytes;
4132 		}
4133 		list_del_init(&rdata->list);
4134 		kref_put(&rdata->refcount, cifs_readdata_release);
4135 	}
4136 
4137 	/* mask nodata case */
4138 	if (rc == -ENODATA)
4139 		rc = 0;
4140 
4141 	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4142 
4143 	mutex_unlock(&ctx->aio_mutex);
4144 
4145 	if (ctx->iocb && ctx->iocb->ki_complete)
4146 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4147 	else
4148 		complete(&ctx->done);
4149 }
4150 
4151 static ssize_t __cifs_readv(
4152 	struct kiocb *iocb, struct iov_iter *to, bool direct)
4153 {
4154 	size_t len;
4155 	struct file *file = iocb->ki_filp;
4156 	struct cifs_sb_info *cifs_sb;
4157 	struct cifsFileInfo *cfile;
4158 	struct cifs_tcon *tcon;
4159 	ssize_t rc, total_read = 0;
4160 	loff_t offset = iocb->ki_pos;
4161 	struct cifs_aio_ctx *ctx;
4162 
4163 	len = iov_iter_count(to);
4164 	if (!len)
4165 		return 0;
4166 
4167 	cifs_sb = CIFS_FILE_SB(file);
4168 	cfile = file->private_data;
4169 	tcon = tlink_tcon(cfile->tlink);
4170 
4171 	if (!tcon->ses->server->ops->async_readv)
4172 		return -ENOSYS;
4173 
4174 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4175 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4176 
4177 	ctx = cifs_aio_ctx_alloc();
4178 	if (!ctx)
4179 		return -ENOMEM;
4180 
4181 	ctx->pos	= offset;
4182 	ctx->direct_io	= direct;
4183 	ctx->len	= len;
4184 	ctx->cfile	= cifsFileInfo_get(cfile);
4185 	ctx->nr_pinned_pages = 0;
4186 
4187 	if (!is_sync_kiocb(iocb))
4188 		ctx->iocb = iocb;
4189 
4190 	if (user_backed_iter(to)) {
4191 		/*
4192 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4193 		 * they contain references to the calling process's virtual
4194 		 * memory layout which won't be available in an async worker
4195 		 * thread.  This also takes a pin on every folio involved.
4196 		 */
4197 		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4198 					     &ctx->iter, 0);
4199 		if (rc < 0) {
4200 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4201 			return rc;
4202 		}
4203 
4204 		ctx->nr_pinned_pages = rc;
4205 		ctx->bv = (void *)ctx->iter.bvec;
4206 		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4207 		ctx->should_dirty = true;
4208 	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4209 		   !is_sync_kiocb(iocb)) {
4210 		/*
4211 		 * If the op is asynchronous, we need to copy the list attached
4212 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4213 		 * will be retained by the caller; in any case, we may or may
4214 		 * not be able to pin the pages, so we don't try.
4215 		 */
4216 		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4217 		if (!ctx->bv) {
4218 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4219 			return -ENOMEM;
4220 		}
4221 	} else {
4222 		/*
4223 		 * Otherwise, we just pass the iterator down as-is and rely on
4224 		 * the caller to make sure the pages referred to by the
4225 		 * iterator don't evaporate.
4226 		 */
4227 		ctx->iter = *to;
4228 	}
4229 
4230 	if (direct) {
4231 		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4232 						  offset, offset + len - 1);
4233 		if (rc) {
4234 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4235 			return -EAGAIN;
4236 		}
4237 	}
4238 
4239 	/* grab a lock here due to read response handlers can access ctx */
4240 	mutex_lock(&ctx->aio_mutex);
4241 
4242 	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4243 
4244 	/* if at least one read request send succeeded, then reset rc */
4245 	if (!list_empty(&ctx->list))
4246 		rc = 0;
4247 
4248 	mutex_unlock(&ctx->aio_mutex);
4249 
4250 	if (rc) {
4251 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4252 		return rc;
4253 	}
4254 
4255 	if (!is_sync_kiocb(iocb)) {
4256 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4257 		return -EIOCBQUEUED;
4258 	}
4259 
4260 	rc = wait_for_completion_killable(&ctx->done);
4261 	if (rc) {
4262 		mutex_lock(&ctx->aio_mutex);
4263 		ctx->rc = rc = -EINTR;
4264 		total_read = ctx->total_len;
4265 		mutex_unlock(&ctx->aio_mutex);
4266 	} else {
4267 		rc = ctx->rc;
4268 		total_read = ctx->total_len;
4269 	}
4270 
4271 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4272 
4273 	if (total_read) {
4274 		iocb->ki_pos += total_read;
4275 		return total_read;
4276 	}
4277 	return rc;
4278 }
4279 
4280 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4281 {
4282 	return __cifs_readv(iocb, to, true);
4283 }
4284 
4285 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4286 {
4287 	return __cifs_readv(iocb, to, false);
4288 }
4289 
4290 ssize_t
4291 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4292 {
4293 	struct inode *inode = file_inode(iocb->ki_filp);
4294 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4295 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4296 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4297 						iocb->ki_filp->private_data;
4298 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4299 	int rc = -EACCES;
4300 
4301 	/*
4302 	 * In strict cache mode we need to read from the server all the time
4303 	 * if we don't have level II oplock because the server can delay mtime
4304 	 * change - so we can't make a decision about inode invalidating.
4305 	 * And we can also fail with pagereading if there are mandatory locks
4306 	 * on pages affected by this read but not on the region from pos to
4307 	 * pos+len-1.
4308 	 */
4309 	if (!CIFS_CACHE_READ(cinode))
4310 		return cifs_user_readv(iocb, to);
4311 
4312 	if (cap_unix(tcon->ses) &&
4313 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4314 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4315 		return generic_file_read_iter(iocb, to);
4316 
4317 	/*
4318 	 * We need to hold the sem to be sure nobody modifies lock list
4319 	 * with a brlock that prevents reading.
4320 	 */
4321 	down_read(&cinode->lock_sem);
4322 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4323 				     tcon->ses->server->vals->shared_lock_type,
4324 				     0, NULL, CIFS_READ_OP))
4325 		rc = generic_file_read_iter(iocb, to);
4326 	up_read(&cinode->lock_sem);
4327 	return rc;
4328 }
4329 
4330 static ssize_t
4331 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4332 {
4333 	int rc = -EACCES;
4334 	unsigned int bytes_read = 0;
4335 	unsigned int total_read;
4336 	unsigned int current_read_size;
4337 	unsigned int rsize;
4338 	struct cifs_sb_info *cifs_sb;
4339 	struct cifs_tcon *tcon;
4340 	struct TCP_Server_Info *server;
4341 	unsigned int xid;
4342 	char *cur_offset;
4343 	struct cifsFileInfo *open_file;
4344 	struct cifs_io_parms io_parms = {0};
4345 	int buf_type = CIFS_NO_BUFFER;
4346 	__u32 pid;
4347 
4348 	xid = get_xid();
4349 	cifs_sb = CIFS_FILE_SB(file);
4350 
4351 	/* FIXME: set up handlers for larger reads and/or convert to async */
4352 	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4353 
4354 	if (file->private_data == NULL) {
4355 		rc = -EBADF;
4356 		free_xid(xid);
4357 		return rc;
4358 	}
4359 	open_file = file->private_data;
4360 	tcon = tlink_tcon(open_file->tlink);
4361 	server = cifs_pick_channel(tcon->ses);
4362 
4363 	if (!server->ops->sync_read) {
4364 		free_xid(xid);
4365 		return -ENOSYS;
4366 	}
4367 
4368 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4369 		pid = open_file->pid;
4370 	else
4371 		pid = current->tgid;
4372 
4373 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4374 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4375 
4376 	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4377 	     total_read += bytes_read, cur_offset += bytes_read) {
4378 		do {
4379 			current_read_size = min_t(uint, read_size - total_read,
4380 						  rsize);
4381 			/*
4382 			 * For windows me and 9x we do not want to request more
4383 			 * than it negotiated since it will refuse the read
4384 			 * then.
4385 			 */
4386 			if (!(tcon->ses->capabilities &
4387 				tcon->ses->server->vals->cap_large_files)) {
4388 				current_read_size = min_t(uint,
4389 					current_read_size, CIFSMaxBufSize);
4390 			}
4391 			if (open_file->invalidHandle) {
4392 				rc = cifs_reopen_file(open_file, true);
4393 				if (rc != 0)
4394 					break;
4395 			}
4396 			io_parms.pid = pid;
4397 			io_parms.tcon = tcon;
4398 			io_parms.offset = *offset;
4399 			io_parms.length = current_read_size;
4400 			io_parms.server = server;
4401 			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4402 						    &bytes_read, &cur_offset,
4403 						    &buf_type);
4404 		} while (rc == -EAGAIN);
4405 
4406 		if (rc || (bytes_read == 0)) {
4407 			if (total_read) {
4408 				break;
4409 			} else {
4410 				free_xid(xid);
4411 				return rc;
4412 			}
4413 		} else {
4414 			cifs_stats_bytes_read(tcon, total_read);
4415 			*offset += bytes_read;
4416 		}
4417 	}
4418 	free_xid(xid);
4419 	return total_read;
4420 }
4421 
4422 /*
4423  * If the page is mmap'ed into a process' page tables, then we need to make
4424  * sure that it doesn't change while being written back.
4425  */
4426 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4427 {
4428 	struct folio *folio = page_folio(vmf->page);
4429 
4430 	/* Wait for the folio to be written to the cache before we allow it to
4431 	 * be modified.  We then assume the entire folio will need writing back.
4432 	 */
4433 #ifdef CONFIG_CIFS_FSCACHE
4434 	if (folio_test_fscache(folio) &&
4435 	    folio_wait_fscache_killable(folio) < 0)
4436 		return VM_FAULT_RETRY;
4437 #endif
4438 
4439 	folio_wait_writeback(folio);
4440 
4441 	if (folio_lock_killable(folio) < 0)
4442 		return VM_FAULT_RETRY;
4443 	return VM_FAULT_LOCKED;
4444 }
4445 
4446 static const struct vm_operations_struct cifs_file_vm_ops = {
4447 	.fault = filemap_fault,
4448 	.map_pages = filemap_map_pages,
4449 	.page_mkwrite = cifs_page_mkwrite,
4450 };
4451 
4452 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4453 {
4454 	int xid, rc = 0;
4455 	struct inode *inode = file_inode(file);
4456 
4457 	xid = get_xid();
4458 
4459 	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4460 		rc = cifs_zap_mapping(inode);
4461 	if (!rc)
4462 		rc = generic_file_mmap(file, vma);
4463 	if (!rc)
4464 		vma->vm_ops = &cifs_file_vm_ops;
4465 
4466 	free_xid(xid);
4467 	return rc;
4468 }
4469 
4470 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4471 {
4472 	int rc, xid;
4473 
4474 	xid = get_xid();
4475 
4476 	rc = cifs_revalidate_file(file);
4477 	if (rc)
4478 		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4479 			 rc);
4480 	if (!rc)
4481 		rc = generic_file_mmap(file, vma);
4482 	if (!rc)
4483 		vma->vm_ops = &cifs_file_vm_ops;
4484 
4485 	free_xid(xid);
4486 	return rc;
4487 }
4488 
4489 /*
4490  * Unlock a bunch of folios in the pagecache.
4491  */
4492 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4493 {
4494 	struct folio *folio;
4495 	XA_STATE(xas, &mapping->i_pages, first);
4496 
4497 	rcu_read_lock();
4498 	xas_for_each(&xas, folio, last) {
4499 		folio_unlock(folio);
4500 	}
4501 	rcu_read_unlock();
4502 }
4503 
4504 static void cifs_readahead_complete(struct work_struct *work)
4505 {
4506 	struct cifs_readdata *rdata = container_of(work,
4507 						   struct cifs_readdata, work);
4508 	struct folio *folio;
4509 	pgoff_t last;
4510 	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4511 
4512 	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4513 
4514 	if (good)
4515 		cifs_readahead_to_fscache(rdata->mapping->host,
4516 					  rdata->offset, rdata->bytes);
4517 
4518 	if (iov_iter_count(&rdata->iter) > 0)
4519 		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4520 
4521 	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4522 
4523 	rcu_read_lock();
4524 	xas_for_each(&xas, folio, last) {
4525 		if (good) {
4526 			flush_dcache_folio(folio);
4527 			folio_mark_uptodate(folio);
4528 		}
4529 		folio_unlock(folio);
4530 	}
4531 	rcu_read_unlock();
4532 
4533 	kref_put(&rdata->refcount, cifs_readdata_release);
4534 }
4535 
4536 static void cifs_readahead(struct readahead_control *ractl)
4537 {
4538 	struct cifsFileInfo *open_file = ractl->file->private_data;
4539 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4540 	struct TCP_Server_Info *server;
4541 	unsigned int xid, nr_pages, cache_nr_pages = 0;
4542 	unsigned int ra_pages;
4543 	pgoff_t next_cached = ULONG_MAX, ra_index;
4544 	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4545 		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4546 	bool check_cache = caching;
4547 	pid_t pid;
4548 	int rc = 0;
4549 
4550 	/* Note that readahead_count() lags behind our dequeuing of pages from
4551 	 * the ractl, wo we have to keep track for ourselves.
4552 	 */
4553 	ra_pages = readahead_count(ractl);
4554 	ra_index = readahead_index(ractl);
4555 
4556 	xid = get_xid();
4557 
4558 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4559 		pid = open_file->pid;
4560 	else
4561 		pid = current->tgid;
4562 
4563 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4564 
4565 	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4566 		 __func__, ractl->file, ractl->mapping, ra_pages);
4567 
4568 	/*
4569 	 * Chop the readahead request up into rsize-sized read requests.
4570 	 */
4571 	while ((nr_pages = ra_pages)) {
4572 		unsigned int i, rsize;
4573 		struct cifs_readdata *rdata;
4574 		struct cifs_credits credits_on_stack;
4575 		struct cifs_credits *credits = &credits_on_stack;
4576 		struct folio *folio;
4577 		pgoff_t fsize;
4578 
4579 		/*
4580 		 * Find out if we have anything cached in the range of
4581 		 * interest, and if so, where the next chunk of cached data is.
4582 		 */
4583 		if (caching) {
4584 			if (check_cache) {
4585 				rc = cifs_fscache_query_occupancy(
4586 					ractl->mapping->host, ra_index, nr_pages,
4587 					&next_cached, &cache_nr_pages);
4588 				if (rc < 0)
4589 					caching = false;
4590 				check_cache = false;
4591 			}
4592 
4593 			if (ra_index == next_cached) {
4594 				/*
4595 				 * TODO: Send a whole batch of pages to be read
4596 				 * by the cache.
4597 				 */
4598 				folio = readahead_folio(ractl);
4599 				fsize = folio_nr_pages(folio);
4600 				ra_pages -= fsize;
4601 				ra_index += fsize;
4602 				if (cifs_readpage_from_fscache(ractl->mapping->host,
4603 							       &folio->page) < 0) {
4604 					/*
4605 					 * TODO: Deal with cache read failure
4606 					 * here, but for the moment, delegate
4607 					 * that to readpage.
4608 					 */
4609 					caching = false;
4610 				}
4611 				folio_unlock(folio);
4612 				next_cached += fsize;
4613 				cache_nr_pages -= fsize;
4614 				if (cache_nr_pages == 0)
4615 					check_cache = true;
4616 				continue;
4617 			}
4618 		}
4619 
4620 		if (open_file->invalidHandle) {
4621 			rc = cifs_reopen_file(open_file, true);
4622 			if (rc) {
4623 				if (rc == -EAGAIN)
4624 					continue;
4625 				break;
4626 			}
4627 		}
4628 
4629 		if (cifs_sb->ctx->rsize == 0)
4630 			cifs_sb->ctx->rsize =
4631 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4632 							     cifs_sb->ctx);
4633 
4634 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4635 						   &rsize, credits);
4636 		if (rc)
4637 			break;
4638 		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4639 		if (next_cached != ULONG_MAX)
4640 			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4641 
4642 		/*
4643 		 * Give up immediately if rsize is too small to read an entire
4644 		 * page. The VFS will fall back to readpage. We should never
4645 		 * reach this point however since we set ra_pages to 0 when the
4646 		 * rsize is smaller than a cache page.
4647 		 */
4648 		if (unlikely(!nr_pages)) {
4649 			add_credits_and_wake_if(server, credits, 0);
4650 			break;
4651 		}
4652 
4653 		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4654 		if (!rdata) {
4655 			/* best to give up if we're out of mem */
4656 			add_credits_and_wake_if(server, credits, 0);
4657 			break;
4658 		}
4659 
4660 		rdata->offset	= ra_index * PAGE_SIZE;
4661 		rdata->bytes	= nr_pages * PAGE_SIZE;
4662 		rdata->cfile	= cifsFileInfo_get(open_file);
4663 		rdata->server	= server;
4664 		rdata->mapping	= ractl->mapping;
4665 		rdata->pid	= pid;
4666 		rdata->credits	= credits_on_stack;
4667 
4668 		for (i = 0; i < nr_pages; i++) {
4669 			if (!readahead_folio(ractl))
4670 				WARN_ON(1);
4671 		}
4672 		ra_pages -= nr_pages;
4673 		ra_index += nr_pages;
4674 
4675 		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4676 				rdata->offset, rdata->bytes);
4677 
4678 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4679 		if (!rc) {
4680 			if (rdata->cfile->invalidHandle)
4681 				rc = -EAGAIN;
4682 			else
4683 				rc = server->ops->async_readv(rdata);
4684 		}
4685 
4686 		if (rc) {
4687 			add_credits_and_wake_if(server, &rdata->credits, 0);
4688 			cifs_unlock_folios(rdata->mapping,
4689 					   rdata->offset / PAGE_SIZE,
4690 					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4691 			/* Fallback to the readpage in error/reconnect cases */
4692 			kref_put(&rdata->refcount, cifs_readdata_release);
4693 			break;
4694 		}
4695 
4696 		kref_put(&rdata->refcount, cifs_readdata_release);
4697 	}
4698 
4699 	free_xid(xid);
4700 }
4701 
4702 /*
4703  * cifs_readpage_worker must be called with the page pinned
4704  */
4705 static int cifs_readpage_worker(struct file *file, struct page *page,
4706 	loff_t *poffset)
4707 {
4708 	struct inode *inode = file_inode(file);
4709 	struct timespec64 atime, mtime;
4710 	char *read_data;
4711 	int rc;
4712 
4713 	/* Is the page cached? */
4714 	rc = cifs_readpage_from_fscache(inode, page);
4715 	if (rc == 0)
4716 		goto read_complete;
4717 
4718 	read_data = kmap(page);
4719 	/* for reads over a certain size could initiate async read ahead */
4720 
4721 	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4722 
4723 	if (rc < 0)
4724 		goto io_error;
4725 	else
4726 		cifs_dbg(FYI, "Bytes read %d\n", rc);
4727 
4728 	/* we do not want atime to be less than mtime, it broke some apps */
4729 	atime = inode_set_atime_to_ts(inode, current_time(inode));
4730 	mtime = inode_get_mtime(inode);
4731 	if (timespec64_compare(&atime, &mtime) < 0)
4732 		inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4733 
4734 	if (PAGE_SIZE > rc)
4735 		memset(read_data + rc, 0, PAGE_SIZE - rc);
4736 
4737 	flush_dcache_page(page);
4738 	SetPageUptodate(page);
4739 	rc = 0;
4740 
4741 io_error:
4742 	kunmap(page);
4743 
4744 read_complete:
4745 	unlock_page(page);
4746 	return rc;
4747 }
4748 
4749 static int cifs_read_folio(struct file *file, struct folio *folio)
4750 {
4751 	struct page *page = &folio->page;
4752 	loff_t offset = page_file_offset(page);
4753 	int rc = -EACCES;
4754 	unsigned int xid;
4755 
4756 	xid = get_xid();
4757 
4758 	if (file->private_data == NULL) {
4759 		rc = -EBADF;
4760 		free_xid(xid);
4761 		return rc;
4762 	}
4763 
4764 	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4765 		 page, (int)offset, (int)offset);
4766 
4767 	rc = cifs_readpage_worker(file, page, &offset);
4768 
4769 	free_xid(xid);
4770 	return rc;
4771 }
4772 
4773 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4774 {
4775 	struct cifsFileInfo *open_file;
4776 
4777 	spin_lock(&cifs_inode->open_file_lock);
4778 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4779 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4780 			spin_unlock(&cifs_inode->open_file_lock);
4781 			return 1;
4782 		}
4783 	}
4784 	spin_unlock(&cifs_inode->open_file_lock);
4785 	return 0;
4786 }
4787 
4788 /* We do not want to update the file size from server for inodes
4789    open for write - to avoid races with writepage extending
4790    the file - in the future we could consider allowing
4791    refreshing the inode only on increases in the file size
4792    but this is tricky to do without racing with writebehind
4793    page caching in the current Linux kernel design */
4794 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
4795 			    bool from_readdir)
4796 {
4797 	if (!cifsInode)
4798 		return true;
4799 
4800 	if (is_inode_writable(cifsInode) ||
4801 		((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
4802 		/* This inode is open for write at least once */
4803 		struct cifs_sb_info *cifs_sb;
4804 
4805 		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4806 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4807 			/* since no page cache to corrupt on directio
4808 			we can change size safely */
4809 			return true;
4810 		}
4811 
4812 		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4813 			return true;
4814 
4815 		return false;
4816 	} else
4817 		return true;
4818 }
4819 
4820 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4821 			loff_t pos, unsigned len,
4822 			struct page **pagep, void **fsdata)
4823 {
4824 	int oncethru = 0;
4825 	pgoff_t index = pos >> PAGE_SHIFT;
4826 	loff_t offset = pos & (PAGE_SIZE - 1);
4827 	loff_t page_start = pos & PAGE_MASK;
4828 	loff_t i_size;
4829 	struct page *page;
4830 	int rc = 0;
4831 
4832 	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4833 
4834 start:
4835 	page = grab_cache_page_write_begin(mapping, index);
4836 	if (!page) {
4837 		rc = -ENOMEM;
4838 		goto out;
4839 	}
4840 
4841 	if (PageUptodate(page))
4842 		goto out;
4843 
4844 	/*
4845 	 * If we write a full page it will be up to date, no need to read from
4846 	 * the server. If the write is short, we'll end up doing a sync write
4847 	 * instead.
4848 	 */
4849 	if (len == PAGE_SIZE)
4850 		goto out;
4851 
4852 	/*
4853 	 * optimize away the read when we have an oplock, and we're not
4854 	 * expecting to use any of the data we'd be reading in. That
4855 	 * is, when the page lies beyond the EOF, or straddles the EOF
4856 	 * and the write will cover all of the existing data.
4857 	 */
4858 	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4859 		i_size = i_size_read(mapping->host);
4860 		if (page_start >= i_size ||
4861 		    (offset == 0 && (pos + len) >= i_size)) {
4862 			zero_user_segments(page, 0, offset,
4863 					   offset + len,
4864 					   PAGE_SIZE);
4865 			/*
4866 			 * PageChecked means that the parts of the page
4867 			 * to which we're not writing are considered up
4868 			 * to date. Once the data is copied to the
4869 			 * page, it can be set uptodate.
4870 			 */
4871 			SetPageChecked(page);
4872 			goto out;
4873 		}
4874 	}
4875 
4876 	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4877 		/*
4878 		 * might as well read a page, it is fast enough. If we get
4879 		 * an error, we don't need to return it. cifs_write_end will
4880 		 * do a sync write instead since PG_uptodate isn't set.
4881 		 */
4882 		cifs_readpage_worker(file, page, &page_start);
4883 		put_page(page);
4884 		oncethru = 1;
4885 		goto start;
4886 	} else {
4887 		/* we could try using another file handle if there is one -
4888 		   but how would we lock it to prevent close of that handle
4889 		   racing with this read? In any case
4890 		   this will be written out by write_end so is fine */
4891 	}
4892 out:
4893 	*pagep = page;
4894 	return rc;
4895 }
4896 
4897 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4898 {
4899 	if (folio_test_private(folio))
4900 		return 0;
4901 	if (folio_test_fscache(folio)) {
4902 		if (current_is_kswapd() || !(gfp & __GFP_FS))
4903 			return false;
4904 		folio_wait_fscache(folio);
4905 	}
4906 	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4907 	return true;
4908 }
4909 
4910 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4911 				 size_t length)
4912 {
4913 	folio_wait_fscache(folio);
4914 }
4915 
4916 static int cifs_launder_folio(struct folio *folio)
4917 {
4918 	int rc = 0;
4919 	loff_t range_start = folio_pos(folio);
4920 	loff_t range_end = range_start + folio_size(folio);
4921 	struct writeback_control wbc = {
4922 		.sync_mode = WB_SYNC_ALL,
4923 		.nr_to_write = 0,
4924 		.range_start = range_start,
4925 		.range_end = range_end,
4926 	};
4927 
4928 	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4929 
4930 	if (folio_clear_dirty_for_io(folio))
4931 		rc = cifs_writepage_locked(&folio->page, &wbc);
4932 
4933 	folio_wait_fscache(folio);
4934 	return rc;
4935 }
4936 
4937 void cifs_oplock_break(struct work_struct *work)
4938 {
4939 	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4940 						  oplock_break);
4941 	struct inode *inode = d_inode(cfile->dentry);
4942 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4943 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4944 	struct cifs_tcon *tcon;
4945 	struct TCP_Server_Info *server;
4946 	struct tcon_link *tlink;
4947 	int rc = 0;
4948 	bool purge_cache = false, oplock_break_cancelled;
4949 	__u64 persistent_fid, volatile_fid;
4950 	__u16 net_fid;
4951 
4952 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4953 			TASK_UNINTERRUPTIBLE);
4954 
4955 	tlink = cifs_sb_tlink(cifs_sb);
4956 	if (IS_ERR(tlink))
4957 		goto out;
4958 	tcon = tlink_tcon(tlink);
4959 	server = tcon->ses->server;
4960 
4961 	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4962 				      cfile->oplock_epoch, &purge_cache);
4963 
4964 	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4965 						cifs_has_mand_locks(cinode)) {
4966 		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4967 			 inode);
4968 		cinode->oplock = 0;
4969 	}
4970 
4971 	if (inode && S_ISREG(inode->i_mode)) {
4972 		if (CIFS_CACHE_READ(cinode))
4973 			break_lease(inode, O_RDONLY);
4974 		else
4975 			break_lease(inode, O_WRONLY);
4976 		rc = filemap_fdatawrite(inode->i_mapping);
4977 		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4978 			rc = filemap_fdatawait(inode->i_mapping);
4979 			mapping_set_error(inode->i_mapping, rc);
4980 			cifs_zap_mapping(inode);
4981 		}
4982 		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4983 		if (CIFS_CACHE_WRITE(cinode))
4984 			goto oplock_break_ack;
4985 	}
4986 
4987 	rc = cifs_push_locks(cfile);
4988 	if (rc)
4989 		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4990 
4991 oplock_break_ack:
4992 	/*
4993 	 * When oplock break is received and there are no active
4994 	 * file handles but cached, then schedule deferred close immediately.
4995 	 * So, new open will not use cached handle.
4996 	 */
4997 
4998 	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
4999 		cifs_close_deferred_file(cinode);
5000 
5001 	persistent_fid = cfile->fid.persistent_fid;
5002 	volatile_fid = cfile->fid.volatile_fid;
5003 	net_fid = cfile->fid.netfid;
5004 	oplock_break_cancelled = cfile->oplock_break_cancelled;
5005 
5006 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5007 	/*
5008 	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5009 	 * an acknowledgment to be sent when the file has already been closed.
5010 	 */
5011 	spin_lock(&cinode->open_file_lock);
5012 	/* check list empty since can race with kill_sb calling tree disconnect */
5013 	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5014 		spin_unlock(&cinode->open_file_lock);
5015 		rc = server->ops->oplock_response(tcon, persistent_fid,
5016 						  volatile_fid, net_fid, cinode);
5017 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5018 	} else
5019 		spin_unlock(&cinode->open_file_lock);
5020 
5021 	cifs_put_tlink(tlink);
5022 out:
5023 	cifs_done_oplock_break(cinode);
5024 }
5025 
5026 /*
5027  * The presence of cifs_direct_io() in the address space ops vector
5028  * allowes open() O_DIRECT flags which would have failed otherwise.
5029  *
5030  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5031  * so this method should never be called.
5032  *
5033  * Direct IO is not yet supported in the cached mode.
5034  */
5035 static ssize_t
5036 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5037 {
5038         /*
5039          * FIXME
5040          * Eventually need to support direct IO for non forcedirectio mounts
5041          */
5042         return -EINVAL;
5043 }
5044 
5045 static int cifs_swap_activate(struct swap_info_struct *sis,
5046 			      struct file *swap_file, sector_t *span)
5047 {
5048 	struct cifsFileInfo *cfile = swap_file->private_data;
5049 	struct inode *inode = swap_file->f_mapping->host;
5050 	unsigned long blocks;
5051 	long long isize;
5052 
5053 	cifs_dbg(FYI, "swap activate\n");
5054 
5055 	if (!swap_file->f_mapping->a_ops->swap_rw)
5056 		/* Cannot support swap */
5057 		return -EINVAL;
5058 
5059 	spin_lock(&inode->i_lock);
5060 	blocks = inode->i_blocks;
5061 	isize = inode->i_size;
5062 	spin_unlock(&inode->i_lock);
5063 	if (blocks*512 < isize) {
5064 		pr_warn("swap activate: swapfile has holes\n");
5065 		return -EINVAL;
5066 	}
5067 	*span = sis->pages;
5068 
5069 	pr_warn_once("Swap support over SMB3 is experimental\n");
5070 
5071 	/*
5072 	 * TODO: consider adding ACL (or documenting how) to prevent other
5073 	 * users (on this or other systems) from reading it
5074 	 */
5075 
5076 
5077 	/* TODO: add sk_set_memalloc(inet) or similar */
5078 
5079 	if (cfile)
5080 		cfile->swapfile = true;
5081 	/*
5082 	 * TODO: Since file already open, we can't open with DENY_ALL here
5083 	 * but we could add call to grab a byte range lock to prevent others
5084 	 * from reading or writing the file
5085 	 */
5086 
5087 	sis->flags |= SWP_FS_OPS;
5088 	return add_swap_extent(sis, 0, sis->max, 0);
5089 }
5090 
5091 static void cifs_swap_deactivate(struct file *file)
5092 {
5093 	struct cifsFileInfo *cfile = file->private_data;
5094 
5095 	cifs_dbg(FYI, "swap deactivate\n");
5096 
5097 	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5098 
5099 	if (cfile)
5100 		cfile->swapfile = false;
5101 
5102 	/* do we need to unpin (or unlock) the file */
5103 }
5104 
5105 const struct address_space_operations cifs_addr_ops = {
5106 	.read_folio = cifs_read_folio,
5107 	.readahead = cifs_readahead,
5108 	.writepages = cifs_writepages,
5109 	.write_begin = cifs_write_begin,
5110 	.write_end = cifs_write_end,
5111 	.dirty_folio = netfs_dirty_folio,
5112 	.release_folio = cifs_release_folio,
5113 	.direct_IO = cifs_direct_io,
5114 	.invalidate_folio = cifs_invalidate_folio,
5115 	.launder_folio = cifs_launder_folio,
5116 	.migrate_folio = filemap_migrate_folio,
5117 	/*
5118 	 * TODO: investigate and if useful we could add an is_dirty_writeback
5119 	 * helper if needed
5120 	 */
5121 	.swap_activate = cifs_swap_activate,
5122 	.swap_deactivate = cifs_swap_deactivate,
5123 };
5124 
5125 /*
5126  * cifs_readahead requires the server to support a buffer large enough to
5127  * contain the header plus one complete page of data.  Otherwise, we need
5128  * to leave cifs_readahead out of the address space operations.
5129  */
5130 const struct address_space_operations cifs_addr_ops_smallbuf = {
5131 	.read_folio = cifs_read_folio,
5132 	.writepages = cifs_writepages,
5133 	.write_begin = cifs_write_begin,
5134 	.write_end = cifs_write_end,
5135 	.dirty_folio = netfs_dirty_folio,
5136 	.release_folio = cifs_release_folio,
5137 	.invalidate_folio = cifs_invalidate_folio,
5138 	.launder_folio = cifs_launder_folio,
5139 	.migrate_folio = filemap_migrate_folio,
5140 };
5141