xref: /linux/fs/smb/client/file.c (revision 02091cbe9cc4f18167208eec1d6de636cc731817)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39 
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45 	struct address_space *mapping = inode->i_mapping;
46 	struct folio *folio;
47 	pgoff_t end;
48 
49 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50 
51 	rcu_read_lock();
52 
53 	end = (start + len - 1) / PAGE_SIZE;
54 	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55 		if (xas_retry(&xas, folio))
56 			continue;
57 		xas_pause(&xas);
58 		rcu_read_unlock();
59 		folio_lock(folio);
60 		folio_clear_dirty_for_io(folio);
61 		folio_unlock(folio);
62 		rcu_read_lock();
63 	}
64 
65 	rcu_read_unlock();
66 }
67 
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73 	struct address_space *mapping = inode->i_mapping;
74 	struct folio *folio;
75 	pgoff_t end;
76 
77 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78 
79 	if (!len)
80 		return;
81 
82 	rcu_read_lock();
83 
84 	end = (start + len - 1) / PAGE_SIZE;
85 	xas_for_each(&xas, folio, end) {
86 		if (xas_retry(&xas, folio))
87 			continue;
88 		if (!folio_test_writeback(folio)) {
89 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90 				  len, start, folio_index(folio), end);
91 			continue;
92 		}
93 
94 		folio_detach_private(folio);
95 		folio_end_writeback(folio);
96 	}
97 
98 	rcu_read_unlock();
99 }
100 
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106 	struct address_space *mapping = inode->i_mapping;
107 	struct folio *folio;
108 	pgoff_t end;
109 
110 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111 
112 	if (!len)
113 		return;
114 
115 	rcu_read_lock();
116 
117 	end = (start + len - 1) / PAGE_SIZE;
118 	xas_for_each(&xas, folio, end) {
119 		if (xas_retry(&xas, folio))
120 			continue;
121 		if (!folio_test_writeback(folio)) {
122 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123 				  len, start, folio_index(folio), end);
124 			continue;
125 		}
126 
127 		folio_set_error(folio);
128 		folio_end_writeback(folio);
129 	}
130 
131 	rcu_read_unlock();
132 }
133 
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139 	struct address_space *mapping = inode->i_mapping;
140 	struct folio *folio;
141 	pgoff_t end;
142 
143 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144 
145 	if (!len)
146 		return;
147 
148 	rcu_read_lock();
149 
150 	end = (start + len - 1) / PAGE_SIZE;
151 	xas_for_each(&xas, folio, end) {
152 		if (!folio_test_writeback(folio)) {
153 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154 				  len, start, folio_index(folio), end);
155 			continue;
156 		}
157 
158 		filemap_dirty_folio(folio->mapping, folio);
159 		folio_end_writeback(folio);
160 	}
161 
162 	rcu_read_unlock();
163 }
164 
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172 	struct cifsFileInfo *open_file = NULL;
173 	struct list_head *tmp;
174 	struct list_head *tmp1;
175 
176 	/* only send once per connect */
177 	spin_lock(&tcon->tc_lock);
178 	if (tcon->status != TID_NEED_RECON) {
179 		spin_unlock(&tcon->tc_lock);
180 		return;
181 	}
182 	tcon->status = TID_IN_FILES_INVALIDATE;
183 	spin_unlock(&tcon->tc_lock);
184 
185 	/* list all files open on tree connection and mark them invalid */
186 	spin_lock(&tcon->open_file_lock);
187 	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
188 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
189 		open_file->invalidHandle = true;
190 		open_file->oplock_break_cancelled = true;
191 	}
192 	spin_unlock(&tcon->open_file_lock);
193 
194 	invalidate_all_cached_dirs(tcon);
195 	spin_lock(&tcon->tc_lock);
196 	if (tcon->status == TID_IN_FILES_INVALIDATE)
197 		tcon->status = TID_NEED_TCON;
198 	spin_unlock(&tcon->tc_lock);
199 
200 	/*
201 	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
202 	 * to this tcon.
203 	 */
204 }
205 
206 static inline int cifs_convert_flags(unsigned int flags)
207 {
208 	if ((flags & O_ACCMODE) == O_RDONLY)
209 		return GENERIC_READ;
210 	else if ((flags & O_ACCMODE) == O_WRONLY)
211 		return GENERIC_WRITE;
212 	else if ((flags & O_ACCMODE) == O_RDWR) {
213 		/* GENERIC_ALL is too much permission to request
214 		   can cause unnecessary access denied on create */
215 		/* return GENERIC_ALL; */
216 		return (GENERIC_READ | GENERIC_WRITE);
217 	}
218 
219 	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
220 		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
221 		FILE_READ_DATA);
222 }
223 
224 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
225 static u32 cifs_posix_convert_flags(unsigned int flags)
226 {
227 	u32 posix_flags = 0;
228 
229 	if ((flags & O_ACCMODE) == O_RDONLY)
230 		posix_flags = SMB_O_RDONLY;
231 	else if ((flags & O_ACCMODE) == O_WRONLY)
232 		posix_flags = SMB_O_WRONLY;
233 	else if ((flags & O_ACCMODE) == O_RDWR)
234 		posix_flags = SMB_O_RDWR;
235 
236 	if (flags & O_CREAT) {
237 		posix_flags |= SMB_O_CREAT;
238 		if (flags & O_EXCL)
239 			posix_flags |= SMB_O_EXCL;
240 	} else if (flags & O_EXCL)
241 		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
242 			 current->comm, current->tgid);
243 
244 	if (flags & O_TRUNC)
245 		posix_flags |= SMB_O_TRUNC;
246 	/* be safe and imply O_SYNC for O_DSYNC */
247 	if (flags & O_DSYNC)
248 		posix_flags |= SMB_O_SYNC;
249 	if (flags & O_DIRECTORY)
250 		posix_flags |= SMB_O_DIRECTORY;
251 	if (flags & O_NOFOLLOW)
252 		posix_flags |= SMB_O_NOFOLLOW;
253 	if (flags & O_DIRECT)
254 		posix_flags |= SMB_O_DIRECT;
255 
256 	return posix_flags;
257 }
258 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
259 
260 static inline int cifs_get_disposition(unsigned int flags)
261 {
262 	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
263 		return FILE_CREATE;
264 	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
265 		return FILE_OVERWRITE_IF;
266 	else if ((flags & O_CREAT) == O_CREAT)
267 		return FILE_OPEN_IF;
268 	else if ((flags & O_TRUNC) == O_TRUNC)
269 		return FILE_OVERWRITE;
270 	else
271 		return FILE_OPEN;
272 }
273 
274 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
275 int cifs_posix_open(const char *full_path, struct inode **pinode,
276 			struct super_block *sb, int mode, unsigned int f_flags,
277 			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
278 {
279 	int rc;
280 	FILE_UNIX_BASIC_INFO *presp_data;
281 	__u32 posix_flags = 0;
282 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
283 	struct cifs_fattr fattr;
284 	struct tcon_link *tlink;
285 	struct cifs_tcon *tcon;
286 
287 	cifs_dbg(FYI, "posix open %s\n", full_path);
288 
289 	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
290 	if (presp_data == NULL)
291 		return -ENOMEM;
292 
293 	tlink = cifs_sb_tlink(cifs_sb);
294 	if (IS_ERR(tlink)) {
295 		rc = PTR_ERR(tlink);
296 		goto posix_open_ret;
297 	}
298 
299 	tcon = tlink_tcon(tlink);
300 	mode &= ~current_umask();
301 
302 	posix_flags = cifs_posix_convert_flags(f_flags);
303 	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
304 			     poplock, full_path, cifs_sb->local_nls,
305 			     cifs_remap(cifs_sb));
306 	cifs_put_tlink(tlink);
307 
308 	if (rc)
309 		goto posix_open_ret;
310 
311 	if (presp_data->Type == cpu_to_le32(-1))
312 		goto posix_open_ret; /* open ok, caller does qpathinfo */
313 
314 	if (!pinode)
315 		goto posix_open_ret; /* caller does not need info */
316 
317 	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
318 
319 	/* get new inode and set it up */
320 	if (*pinode == NULL) {
321 		cifs_fill_uniqueid(sb, &fattr);
322 		*pinode = cifs_iget(sb, &fattr);
323 		if (!*pinode) {
324 			rc = -ENOMEM;
325 			goto posix_open_ret;
326 		}
327 	} else {
328 		cifs_revalidate_mapping(*pinode);
329 		rc = cifs_fattr_to_inode(*pinode, &fattr);
330 	}
331 
332 posix_open_ret:
333 	kfree(presp_data);
334 	return rc;
335 }
336 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
337 
338 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
339 			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
340 			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
341 {
342 	int rc;
343 	int desired_access;
344 	int disposition;
345 	int create_options = CREATE_NOT_DIR;
346 	struct TCP_Server_Info *server = tcon->ses->server;
347 	struct cifs_open_parms oparms;
348 
349 	if (!server->ops->open)
350 		return -ENOSYS;
351 
352 	desired_access = cifs_convert_flags(f_flags);
353 
354 /*********************************************************************
355  *  open flag mapping table:
356  *
357  *	POSIX Flag            CIFS Disposition
358  *	----------            ----------------
359  *	O_CREAT               FILE_OPEN_IF
360  *	O_CREAT | O_EXCL      FILE_CREATE
361  *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
362  *	O_TRUNC               FILE_OVERWRITE
363  *	none of the above     FILE_OPEN
364  *
365  *	Note that there is not a direct match between disposition
366  *	FILE_SUPERSEDE (ie create whether or not file exists although
367  *	O_CREAT | O_TRUNC is similar but truncates the existing
368  *	file rather than creating a new file as FILE_SUPERSEDE does
369  *	(which uses the attributes / metadata passed in on open call)
370  *?
371  *?  O_SYNC is a reasonable match to CIFS writethrough flag
372  *?  and the read write flags match reasonably.  O_LARGEFILE
373  *?  is irrelevant because largefile support is always used
374  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
375  *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
376  *********************************************************************/
377 
378 	disposition = cifs_get_disposition(f_flags);
379 
380 	/* BB pass O_SYNC flag through on file attributes .. BB */
381 
382 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
383 	if (f_flags & O_SYNC)
384 		create_options |= CREATE_WRITE_THROUGH;
385 
386 	if (f_flags & O_DIRECT)
387 		create_options |= CREATE_NO_BUFFER;
388 
389 	oparms = (struct cifs_open_parms) {
390 		.tcon = tcon,
391 		.cifs_sb = cifs_sb,
392 		.desired_access = desired_access,
393 		.create_options = cifs_create_options(cifs_sb, create_options),
394 		.disposition = disposition,
395 		.path = full_path,
396 		.fid = fid,
397 	};
398 
399 	rc = server->ops->open(xid, &oparms, oplock, buf);
400 	if (rc)
401 		return rc;
402 
403 	/* TODO: Add support for calling posix query info but with passing in fid */
404 	if (tcon->unix_ext)
405 		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
406 					      xid);
407 	else
408 		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
409 					 xid, fid);
410 
411 	if (rc) {
412 		server->ops->close(xid, tcon, fid);
413 		if (rc == -ESTALE)
414 			rc = -EOPENSTALE;
415 	}
416 
417 	return rc;
418 }
419 
420 static bool
421 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
422 {
423 	struct cifs_fid_locks *cur;
424 	bool has_locks = false;
425 
426 	down_read(&cinode->lock_sem);
427 	list_for_each_entry(cur, &cinode->llist, llist) {
428 		if (!list_empty(&cur->locks)) {
429 			has_locks = true;
430 			break;
431 		}
432 	}
433 	up_read(&cinode->lock_sem);
434 	return has_locks;
435 }
436 
437 void
438 cifs_down_write(struct rw_semaphore *sem)
439 {
440 	while (!down_write_trylock(sem))
441 		msleep(10);
442 }
443 
444 static void cifsFileInfo_put_work(struct work_struct *work);
445 
446 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
447 				       struct tcon_link *tlink, __u32 oplock,
448 				       const char *symlink_target)
449 {
450 	struct dentry *dentry = file_dentry(file);
451 	struct inode *inode = d_inode(dentry);
452 	struct cifsInodeInfo *cinode = CIFS_I(inode);
453 	struct cifsFileInfo *cfile;
454 	struct cifs_fid_locks *fdlocks;
455 	struct cifs_tcon *tcon = tlink_tcon(tlink);
456 	struct TCP_Server_Info *server = tcon->ses->server;
457 
458 	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
459 	if (cfile == NULL)
460 		return cfile;
461 
462 	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
463 	if (!fdlocks) {
464 		kfree(cfile);
465 		return NULL;
466 	}
467 
468 	if (symlink_target) {
469 		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
470 		if (!cfile->symlink_target) {
471 			kfree(fdlocks);
472 			kfree(cfile);
473 			return NULL;
474 		}
475 	}
476 
477 	INIT_LIST_HEAD(&fdlocks->locks);
478 	fdlocks->cfile = cfile;
479 	cfile->llist = fdlocks;
480 
481 	cfile->count = 1;
482 	cfile->pid = current->tgid;
483 	cfile->uid = current_fsuid();
484 	cfile->dentry = dget(dentry);
485 	cfile->f_flags = file->f_flags;
486 	cfile->invalidHandle = false;
487 	cfile->deferred_close_scheduled = false;
488 	cfile->tlink = cifs_get_tlink(tlink);
489 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
490 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
491 	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
492 	mutex_init(&cfile->fh_mutex);
493 	spin_lock_init(&cfile->file_info_lock);
494 
495 	cifs_sb_active(inode->i_sb);
496 
497 	/*
498 	 * If the server returned a read oplock and we have mandatory brlocks,
499 	 * set oplock level to None.
500 	 */
501 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
502 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
503 		oplock = 0;
504 	}
505 
506 	cifs_down_write(&cinode->lock_sem);
507 	list_add(&fdlocks->llist, &cinode->llist);
508 	up_write(&cinode->lock_sem);
509 
510 	spin_lock(&tcon->open_file_lock);
511 	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
512 		oplock = fid->pending_open->oplock;
513 	list_del(&fid->pending_open->olist);
514 
515 	fid->purge_cache = false;
516 	server->ops->set_fid(cfile, fid, oplock);
517 
518 	list_add(&cfile->tlist, &tcon->openFileList);
519 	atomic_inc(&tcon->num_local_opens);
520 
521 	/* if readable file instance put first in list*/
522 	spin_lock(&cinode->open_file_lock);
523 	if (file->f_mode & FMODE_READ)
524 		list_add(&cfile->flist, &cinode->openFileList);
525 	else
526 		list_add_tail(&cfile->flist, &cinode->openFileList);
527 	spin_unlock(&cinode->open_file_lock);
528 	spin_unlock(&tcon->open_file_lock);
529 
530 	if (fid->purge_cache)
531 		cifs_zap_mapping(inode);
532 
533 	file->private_data = cfile;
534 	return cfile;
535 }
536 
537 struct cifsFileInfo *
538 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
539 {
540 	spin_lock(&cifs_file->file_info_lock);
541 	cifsFileInfo_get_locked(cifs_file);
542 	spin_unlock(&cifs_file->file_info_lock);
543 	return cifs_file;
544 }
545 
546 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
547 {
548 	struct inode *inode = d_inode(cifs_file->dentry);
549 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
550 	struct cifsLockInfo *li, *tmp;
551 	struct super_block *sb = inode->i_sb;
552 
553 	/*
554 	 * Delete any outstanding lock records. We'll lose them when the file
555 	 * is closed anyway.
556 	 */
557 	cifs_down_write(&cifsi->lock_sem);
558 	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
559 		list_del(&li->llist);
560 		cifs_del_lock_waiters(li);
561 		kfree(li);
562 	}
563 	list_del(&cifs_file->llist->llist);
564 	kfree(cifs_file->llist);
565 	up_write(&cifsi->lock_sem);
566 
567 	cifs_put_tlink(cifs_file->tlink);
568 	dput(cifs_file->dentry);
569 	cifs_sb_deactive(sb);
570 	kfree(cifs_file->symlink_target);
571 	kfree(cifs_file);
572 }
573 
574 static void cifsFileInfo_put_work(struct work_struct *work)
575 {
576 	struct cifsFileInfo *cifs_file = container_of(work,
577 			struct cifsFileInfo, put);
578 
579 	cifsFileInfo_put_final(cifs_file);
580 }
581 
582 /**
583  * cifsFileInfo_put - release a reference of file priv data
584  *
585  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
586  *
587  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
588  */
589 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
590 {
591 	_cifsFileInfo_put(cifs_file, true, true);
592 }
593 
594 /**
595  * _cifsFileInfo_put - release a reference of file priv data
596  *
597  * This may involve closing the filehandle @cifs_file out on the
598  * server. Must be called without holding tcon->open_file_lock,
599  * cinode->open_file_lock and cifs_file->file_info_lock.
600  *
601  * If @wait_for_oplock_handler is true and we are releasing the last
602  * reference, wait for any running oplock break handler of the file
603  * and cancel any pending one.
604  *
605  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
606  * @wait_oplock_handler: must be false if called from oplock_break_handler
607  * @offload:	not offloaded on close and oplock breaks
608  *
609  */
610 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
611 		       bool wait_oplock_handler, bool offload)
612 {
613 	struct inode *inode = d_inode(cifs_file->dentry);
614 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
615 	struct TCP_Server_Info *server = tcon->ses->server;
616 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
617 	struct super_block *sb = inode->i_sb;
618 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
619 	struct cifs_fid fid = {};
620 	struct cifs_pending_open open;
621 	bool oplock_break_cancelled;
622 
623 	spin_lock(&tcon->open_file_lock);
624 	spin_lock(&cifsi->open_file_lock);
625 	spin_lock(&cifs_file->file_info_lock);
626 	if (--cifs_file->count > 0) {
627 		spin_unlock(&cifs_file->file_info_lock);
628 		spin_unlock(&cifsi->open_file_lock);
629 		spin_unlock(&tcon->open_file_lock);
630 		return;
631 	}
632 	spin_unlock(&cifs_file->file_info_lock);
633 
634 	if (server->ops->get_lease_key)
635 		server->ops->get_lease_key(inode, &fid);
636 
637 	/* store open in pending opens to make sure we don't miss lease break */
638 	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
639 
640 	/* remove it from the lists */
641 	list_del(&cifs_file->flist);
642 	list_del(&cifs_file->tlist);
643 	atomic_dec(&tcon->num_local_opens);
644 
645 	if (list_empty(&cifsi->openFileList)) {
646 		cifs_dbg(FYI, "closing last open instance for inode %p\n",
647 			 d_inode(cifs_file->dentry));
648 		/*
649 		 * In strict cache mode we need invalidate mapping on the last
650 		 * close  because it may cause a error when we open this file
651 		 * again and get at least level II oplock.
652 		 */
653 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
654 			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
655 		cifs_set_oplock_level(cifsi, 0);
656 	}
657 
658 	spin_unlock(&cifsi->open_file_lock);
659 	spin_unlock(&tcon->open_file_lock);
660 
661 	oplock_break_cancelled = wait_oplock_handler ?
662 		cancel_work_sync(&cifs_file->oplock_break) : false;
663 
664 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
665 		struct TCP_Server_Info *server = tcon->ses->server;
666 		unsigned int xid;
667 
668 		xid = get_xid();
669 		if (server->ops->close_getattr)
670 			server->ops->close_getattr(xid, tcon, cifs_file);
671 		else if (server->ops->close)
672 			server->ops->close(xid, tcon, &cifs_file->fid);
673 		_free_xid(xid);
674 	}
675 
676 	if (oplock_break_cancelled)
677 		cifs_done_oplock_break(cifsi);
678 
679 	cifs_del_pending_open(&open);
680 
681 	if (offload)
682 		queue_work(fileinfo_put_wq, &cifs_file->put);
683 	else
684 		cifsFileInfo_put_final(cifs_file);
685 }
686 
687 int cifs_open(struct inode *inode, struct file *file)
688 
689 {
690 	int rc = -EACCES;
691 	unsigned int xid;
692 	__u32 oplock;
693 	struct cifs_sb_info *cifs_sb;
694 	struct TCP_Server_Info *server;
695 	struct cifs_tcon *tcon;
696 	struct tcon_link *tlink;
697 	struct cifsFileInfo *cfile = NULL;
698 	void *page;
699 	const char *full_path;
700 	bool posix_open_ok = false;
701 	struct cifs_fid fid = {};
702 	struct cifs_pending_open open;
703 	struct cifs_open_info_data data = {};
704 
705 	xid = get_xid();
706 
707 	cifs_sb = CIFS_SB(inode->i_sb);
708 	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
709 		free_xid(xid);
710 		return -EIO;
711 	}
712 
713 	tlink = cifs_sb_tlink(cifs_sb);
714 	if (IS_ERR(tlink)) {
715 		free_xid(xid);
716 		return PTR_ERR(tlink);
717 	}
718 	tcon = tlink_tcon(tlink);
719 	server = tcon->ses->server;
720 
721 	page = alloc_dentry_path();
722 	full_path = build_path_from_dentry(file_dentry(file), page);
723 	if (IS_ERR(full_path)) {
724 		rc = PTR_ERR(full_path);
725 		goto out;
726 	}
727 
728 	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
729 		 inode, file->f_flags, full_path);
730 
731 	if (file->f_flags & O_DIRECT &&
732 	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
733 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
734 			file->f_op = &cifs_file_direct_nobrl_ops;
735 		else
736 			file->f_op = &cifs_file_direct_ops;
737 	}
738 
739 	/* Get the cached handle as SMB2 close is deferred */
740 	rc = cifs_get_readable_path(tcon, full_path, &cfile);
741 	if (rc == 0) {
742 		if (file->f_flags == cfile->f_flags) {
743 			file->private_data = cfile;
744 			spin_lock(&CIFS_I(inode)->deferred_lock);
745 			cifs_del_deferred_close(cfile);
746 			spin_unlock(&CIFS_I(inode)->deferred_lock);
747 			goto use_cache;
748 		} else {
749 			_cifsFileInfo_put(cfile, true, false);
750 		}
751 	}
752 
753 	if (server->oplocks)
754 		oplock = REQ_OPLOCK;
755 	else
756 		oplock = 0;
757 
758 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
759 	if (!tcon->broken_posix_open && tcon->unix_ext &&
760 	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
761 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
762 		/* can not refresh inode info since size could be stale */
763 		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
764 				cifs_sb->ctx->file_mode /* ignored */,
765 				file->f_flags, &oplock, &fid.netfid, xid);
766 		if (rc == 0) {
767 			cifs_dbg(FYI, "posix open succeeded\n");
768 			posix_open_ok = true;
769 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
770 			if (tcon->ses->serverNOS)
771 				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
772 					 tcon->ses->ip_addr,
773 					 tcon->ses->serverNOS);
774 			tcon->broken_posix_open = true;
775 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
776 			 (rc != -EOPNOTSUPP)) /* path not found or net err */
777 			goto out;
778 		/*
779 		 * Else fallthrough to retry open the old way on network i/o
780 		 * or DFS errors.
781 		 */
782 	}
783 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
784 
785 	if (server->ops->get_lease_key)
786 		server->ops->get_lease_key(inode, &fid);
787 
788 	cifs_add_pending_open(&fid, tlink, &open);
789 
790 	if (!posix_open_ok) {
791 		if (server->ops->get_lease_key)
792 			server->ops->get_lease_key(inode, &fid);
793 
794 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
795 				  xid, &data);
796 		if (rc) {
797 			cifs_del_pending_open(&open);
798 			goto out;
799 		}
800 	}
801 
802 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
803 	if (cfile == NULL) {
804 		if (server->ops->close)
805 			server->ops->close(xid, tcon, &fid);
806 		cifs_del_pending_open(&open);
807 		rc = -ENOMEM;
808 		goto out;
809 	}
810 
811 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
812 	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
813 		/*
814 		 * Time to set mode which we can not set earlier due to
815 		 * problems creating new read-only files.
816 		 */
817 		struct cifs_unix_set_info_args args = {
818 			.mode	= inode->i_mode,
819 			.uid	= INVALID_UID, /* no change */
820 			.gid	= INVALID_GID, /* no change */
821 			.ctime	= NO_CHANGE_64,
822 			.atime	= NO_CHANGE_64,
823 			.mtime	= NO_CHANGE_64,
824 			.device	= 0,
825 		};
826 		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
827 				       cfile->pid);
828 	}
829 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
830 
831 use_cache:
832 	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
833 			   file->f_mode & FMODE_WRITE);
834 	if (file->f_flags & O_DIRECT &&
835 	    (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
836 	     file->f_flags & O_APPEND))
837 		cifs_invalidate_cache(file_inode(file),
838 				      FSCACHE_INVAL_DIO_WRITE);
839 
840 out:
841 	free_dentry_path(page);
842 	free_xid(xid);
843 	cifs_put_tlink(tlink);
844 	cifs_free_open_info(&data);
845 	return rc;
846 }
847 
848 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
849 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
850 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
851 
852 /*
853  * Try to reacquire byte range locks that were released when session
854  * to server was lost.
855  */
856 static int
857 cifs_relock_file(struct cifsFileInfo *cfile)
858 {
859 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
860 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
861 	int rc = 0;
862 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
863 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
864 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
865 
866 	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
867 	if (cinode->can_cache_brlcks) {
868 		/* can cache locks - no need to relock */
869 		up_read(&cinode->lock_sem);
870 		return rc;
871 	}
872 
873 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
874 	if (cap_unix(tcon->ses) &&
875 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
876 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
877 		rc = cifs_push_posix_locks(cfile);
878 	else
879 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
880 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
881 
882 	up_read(&cinode->lock_sem);
883 	return rc;
884 }
885 
886 static int
887 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
888 {
889 	int rc = -EACCES;
890 	unsigned int xid;
891 	__u32 oplock;
892 	struct cifs_sb_info *cifs_sb;
893 	struct cifs_tcon *tcon;
894 	struct TCP_Server_Info *server;
895 	struct cifsInodeInfo *cinode;
896 	struct inode *inode;
897 	void *page;
898 	const char *full_path;
899 	int desired_access;
900 	int disposition = FILE_OPEN;
901 	int create_options = CREATE_NOT_DIR;
902 	struct cifs_open_parms oparms;
903 
904 	xid = get_xid();
905 	mutex_lock(&cfile->fh_mutex);
906 	if (!cfile->invalidHandle) {
907 		mutex_unlock(&cfile->fh_mutex);
908 		free_xid(xid);
909 		return 0;
910 	}
911 
912 	inode = d_inode(cfile->dentry);
913 	cifs_sb = CIFS_SB(inode->i_sb);
914 	tcon = tlink_tcon(cfile->tlink);
915 	server = tcon->ses->server;
916 
917 	/*
918 	 * Can not grab rename sem here because various ops, including those
919 	 * that already have the rename sem can end up causing writepage to get
920 	 * called and if the server was down that means we end up here, and we
921 	 * can never tell if the caller already has the rename_sem.
922 	 */
923 	page = alloc_dentry_path();
924 	full_path = build_path_from_dentry(cfile->dentry, page);
925 	if (IS_ERR(full_path)) {
926 		mutex_unlock(&cfile->fh_mutex);
927 		free_dentry_path(page);
928 		free_xid(xid);
929 		return PTR_ERR(full_path);
930 	}
931 
932 	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
933 		 inode, cfile->f_flags, full_path);
934 
935 	if (tcon->ses->server->oplocks)
936 		oplock = REQ_OPLOCK;
937 	else
938 		oplock = 0;
939 
940 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
941 	if (tcon->unix_ext && cap_unix(tcon->ses) &&
942 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
943 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
944 		/*
945 		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
946 		 * original open. Must mask them off for a reopen.
947 		 */
948 		unsigned int oflags = cfile->f_flags &
949 						~(O_CREAT | O_EXCL | O_TRUNC);
950 
951 		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
952 				     cifs_sb->ctx->file_mode /* ignored */,
953 				     oflags, &oplock, &cfile->fid.netfid, xid);
954 		if (rc == 0) {
955 			cifs_dbg(FYI, "posix reopen succeeded\n");
956 			oparms.reconnect = true;
957 			goto reopen_success;
958 		}
959 		/*
960 		 * fallthrough to retry open the old way on errors, especially
961 		 * in the reconnect path it is important to retry hard
962 		 */
963 	}
964 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
965 
966 	desired_access = cifs_convert_flags(cfile->f_flags);
967 
968 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
969 	if (cfile->f_flags & O_SYNC)
970 		create_options |= CREATE_WRITE_THROUGH;
971 
972 	if (cfile->f_flags & O_DIRECT)
973 		create_options |= CREATE_NO_BUFFER;
974 
975 	if (server->ops->get_lease_key)
976 		server->ops->get_lease_key(inode, &cfile->fid);
977 
978 	oparms = (struct cifs_open_parms) {
979 		.tcon = tcon,
980 		.cifs_sb = cifs_sb,
981 		.desired_access = desired_access,
982 		.create_options = cifs_create_options(cifs_sb, create_options),
983 		.disposition = disposition,
984 		.path = full_path,
985 		.fid = &cfile->fid,
986 		.reconnect = true,
987 	};
988 
989 	/*
990 	 * Can not refresh inode by passing in file_info buf to be returned by
991 	 * ops->open and then calling get_inode_info with returned buf since
992 	 * file might have write behind data that needs to be flushed and server
993 	 * version of file size can be stale. If we knew for sure that inode was
994 	 * not dirty locally we could do this.
995 	 */
996 	rc = server->ops->open(xid, &oparms, &oplock, NULL);
997 	if (rc == -ENOENT && oparms.reconnect == false) {
998 		/* durable handle timeout is expired - open the file again */
999 		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1000 		/* indicate that we need to relock the file */
1001 		oparms.reconnect = true;
1002 	}
1003 
1004 	if (rc) {
1005 		mutex_unlock(&cfile->fh_mutex);
1006 		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1007 		cifs_dbg(FYI, "oplock: %d\n", oplock);
1008 		goto reopen_error_exit;
1009 	}
1010 
1011 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1012 reopen_success:
1013 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1014 	cfile->invalidHandle = false;
1015 	mutex_unlock(&cfile->fh_mutex);
1016 	cinode = CIFS_I(inode);
1017 
1018 	if (can_flush) {
1019 		rc = filemap_write_and_wait(inode->i_mapping);
1020 		if (!is_interrupt_error(rc))
1021 			mapping_set_error(inode->i_mapping, rc);
1022 
1023 		if (tcon->posix_extensions)
1024 			rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
1025 		else if (tcon->unix_ext)
1026 			rc = cifs_get_inode_info_unix(&inode, full_path,
1027 						      inode->i_sb, xid);
1028 		else
1029 			rc = cifs_get_inode_info(&inode, full_path, NULL,
1030 						 inode->i_sb, xid, NULL);
1031 	}
1032 	/*
1033 	 * Else we are writing out data to server already and could deadlock if
1034 	 * we tried to flush data, and since we do not know if we have data that
1035 	 * would invalidate the current end of file on the server we can not go
1036 	 * to the server to get the new inode info.
1037 	 */
1038 
1039 	/*
1040 	 * If the server returned a read oplock and we have mandatory brlocks,
1041 	 * set oplock level to None.
1042 	 */
1043 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1044 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1045 		oplock = 0;
1046 	}
1047 
1048 	server->ops->set_fid(cfile, &cfile->fid, oplock);
1049 	if (oparms.reconnect)
1050 		cifs_relock_file(cfile);
1051 
1052 reopen_error_exit:
1053 	free_dentry_path(page);
1054 	free_xid(xid);
1055 	return rc;
1056 }
1057 
1058 void smb2_deferred_work_close(struct work_struct *work)
1059 {
1060 	struct cifsFileInfo *cfile = container_of(work,
1061 			struct cifsFileInfo, deferred.work);
1062 
1063 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1064 	cifs_del_deferred_close(cfile);
1065 	cfile->deferred_close_scheduled = false;
1066 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1067 	_cifsFileInfo_put(cfile, true, false);
1068 }
1069 
1070 int cifs_close(struct inode *inode, struct file *file)
1071 {
1072 	struct cifsFileInfo *cfile;
1073 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1074 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1075 	struct cifs_deferred_close *dclose;
1076 
1077 	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1078 
1079 	if (file->private_data != NULL) {
1080 		cfile = file->private_data;
1081 		file->private_data = NULL;
1082 		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1083 		if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1084 		    && cinode->lease_granted &&
1085 		    !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1086 		    dclose) {
1087 			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1088 				inode->i_mtime = inode_set_ctime_current(inode);
1089 			}
1090 			spin_lock(&cinode->deferred_lock);
1091 			cifs_add_deferred_close(cfile, dclose);
1092 			if (cfile->deferred_close_scheduled &&
1093 			    delayed_work_pending(&cfile->deferred)) {
1094 				/*
1095 				 * If there is no pending work, mod_delayed_work queues new work.
1096 				 * So, Increase the ref count to avoid use-after-free.
1097 				 */
1098 				if (!mod_delayed_work(deferredclose_wq,
1099 						&cfile->deferred, cifs_sb->ctx->closetimeo))
1100 					cifsFileInfo_get(cfile);
1101 			} else {
1102 				/* Deferred close for files */
1103 				queue_delayed_work(deferredclose_wq,
1104 						&cfile->deferred, cifs_sb->ctx->closetimeo);
1105 				cfile->deferred_close_scheduled = true;
1106 				spin_unlock(&cinode->deferred_lock);
1107 				return 0;
1108 			}
1109 			spin_unlock(&cinode->deferred_lock);
1110 			_cifsFileInfo_put(cfile, true, false);
1111 		} else {
1112 			_cifsFileInfo_put(cfile, true, false);
1113 			kfree(dclose);
1114 		}
1115 	}
1116 
1117 	/* return code from the ->release op is always ignored */
1118 	return 0;
1119 }
1120 
1121 void
1122 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1123 {
1124 	struct cifsFileInfo *open_file, *tmp;
1125 	struct list_head tmp_list;
1126 
1127 	if (!tcon->use_persistent || !tcon->need_reopen_files)
1128 		return;
1129 
1130 	tcon->need_reopen_files = false;
1131 
1132 	cifs_dbg(FYI, "Reopen persistent handles\n");
1133 	INIT_LIST_HEAD(&tmp_list);
1134 
1135 	/* list all files open on tree connection, reopen resilient handles  */
1136 	spin_lock(&tcon->open_file_lock);
1137 	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1138 		if (!open_file->invalidHandle)
1139 			continue;
1140 		cifsFileInfo_get(open_file);
1141 		list_add_tail(&open_file->rlist, &tmp_list);
1142 	}
1143 	spin_unlock(&tcon->open_file_lock);
1144 
1145 	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1146 		if (cifs_reopen_file(open_file, false /* do not flush */))
1147 			tcon->need_reopen_files = true;
1148 		list_del_init(&open_file->rlist);
1149 		cifsFileInfo_put(open_file);
1150 	}
1151 }
1152 
1153 int cifs_closedir(struct inode *inode, struct file *file)
1154 {
1155 	int rc = 0;
1156 	unsigned int xid;
1157 	struct cifsFileInfo *cfile = file->private_data;
1158 	struct cifs_tcon *tcon;
1159 	struct TCP_Server_Info *server;
1160 	char *buf;
1161 
1162 	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1163 
1164 	if (cfile == NULL)
1165 		return rc;
1166 
1167 	xid = get_xid();
1168 	tcon = tlink_tcon(cfile->tlink);
1169 	server = tcon->ses->server;
1170 
1171 	cifs_dbg(FYI, "Freeing private data in close dir\n");
1172 	spin_lock(&cfile->file_info_lock);
1173 	if (server->ops->dir_needs_close(cfile)) {
1174 		cfile->invalidHandle = true;
1175 		spin_unlock(&cfile->file_info_lock);
1176 		if (server->ops->close_dir)
1177 			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1178 		else
1179 			rc = -ENOSYS;
1180 		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1181 		/* not much we can do if it fails anyway, ignore rc */
1182 		rc = 0;
1183 	} else
1184 		spin_unlock(&cfile->file_info_lock);
1185 
1186 	buf = cfile->srch_inf.ntwrk_buf_start;
1187 	if (buf) {
1188 		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1189 		cfile->srch_inf.ntwrk_buf_start = NULL;
1190 		if (cfile->srch_inf.smallBuf)
1191 			cifs_small_buf_release(buf);
1192 		else
1193 			cifs_buf_release(buf);
1194 	}
1195 
1196 	cifs_put_tlink(cfile->tlink);
1197 	kfree(file->private_data);
1198 	file->private_data = NULL;
1199 	/* BB can we lock the filestruct while this is going on? */
1200 	free_xid(xid);
1201 	return rc;
1202 }
1203 
1204 static struct cifsLockInfo *
1205 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1206 {
1207 	struct cifsLockInfo *lock =
1208 		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1209 	if (!lock)
1210 		return lock;
1211 	lock->offset = offset;
1212 	lock->length = length;
1213 	lock->type = type;
1214 	lock->pid = current->tgid;
1215 	lock->flags = flags;
1216 	INIT_LIST_HEAD(&lock->blist);
1217 	init_waitqueue_head(&lock->block_q);
1218 	return lock;
1219 }
1220 
1221 void
1222 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1223 {
1224 	struct cifsLockInfo *li, *tmp;
1225 	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1226 		list_del_init(&li->blist);
1227 		wake_up(&li->block_q);
1228 	}
1229 }
1230 
1231 #define CIFS_LOCK_OP	0
1232 #define CIFS_READ_OP	1
1233 #define CIFS_WRITE_OP	2
1234 
1235 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1236 static bool
1237 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1238 			    __u64 length, __u8 type, __u16 flags,
1239 			    struct cifsFileInfo *cfile,
1240 			    struct cifsLockInfo **conf_lock, int rw_check)
1241 {
1242 	struct cifsLockInfo *li;
1243 	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1244 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1245 
1246 	list_for_each_entry(li, &fdlocks->locks, llist) {
1247 		if (offset + length <= li->offset ||
1248 		    offset >= li->offset + li->length)
1249 			continue;
1250 		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1251 		    server->ops->compare_fids(cfile, cur_cfile)) {
1252 			/* shared lock prevents write op through the same fid */
1253 			if (!(li->type & server->vals->shared_lock_type) ||
1254 			    rw_check != CIFS_WRITE_OP)
1255 				continue;
1256 		}
1257 		if ((type & server->vals->shared_lock_type) &&
1258 		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1259 		     current->tgid == li->pid) || type == li->type))
1260 			continue;
1261 		if (rw_check == CIFS_LOCK_OP &&
1262 		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1263 		    server->ops->compare_fids(cfile, cur_cfile))
1264 			continue;
1265 		if (conf_lock)
1266 			*conf_lock = li;
1267 		return true;
1268 	}
1269 	return false;
1270 }
1271 
1272 bool
1273 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1274 			__u8 type, __u16 flags,
1275 			struct cifsLockInfo **conf_lock, int rw_check)
1276 {
1277 	bool rc = false;
1278 	struct cifs_fid_locks *cur;
1279 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1280 
1281 	list_for_each_entry(cur, &cinode->llist, llist) {
1282 		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1283 						 flags, cfile, conf_lock,
1284 						 rw_check);
1285 		if (rc)
1286 			break;
1287 	}
1288 
1289 	return rc;
1290 }
1291 
1292 /*
1293  * Check if there is another lock that prevents us to set the lock (mandatory
1294  * style). If such a lock exists, update the flock structure with its
1295  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1296  * or leave it the same if we can't. Returns 0 if we don't need to request to
1297  * the server or 1 otherwise.
1298  */
1299 static int
1300 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1301 	       __u8 type, struct file_lock *flock)
1302 {
1303 	int rc = 0;
1304 	struct cifsLockInfo *conf_lock;
1305 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1306 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1307 	bool exist;
1308 
1309 	down_read(&cinode->lock_sem);
1310 
1311 	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1312 					flock->fl_flags, &conf_lock,
1313 					CIFS_LOCK_OP);
1314 	if (exist) {
1315 		flock->fl_start = conf_lock->offset;
1316 		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1317 		flock->fl_pid = conf_lock->pid;
1318 		if (conf_lock->type & server->vals->shared_lock_type)
1319 			flock->fl_type = F_RDLCK;
1320 		else
1321 			flock->fl_type = F_WRLCK;
1322 	} else if (!cinode->can_cache_brlcks)
1323 		rc = 1;
1324 	else
1325 		flock->fl_type = F_UNLCK;
1326 
1327 	up_read(&cinode->lock_sem);
1328 	return rc;
1329 }
1330 
1331 static void
1332 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1333 {
1334 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1335 	cifs_down_write(&cinode->lock_sem);
1336 	list_add_tail(&lock->llist, &cfile->llist->locks);
1337 	up_write(&cinode->lock_sem);
1338 }
1339 
1340 /*
1341  * Set the byte-range lock (mandatory style). Returns:
1342  * 1) 0, if we set the lock and don't need to request to the server;
1343  * 2) 1, if no locks prevent us but we need to request to the server;
1344  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1345  */
1346 static int
1347 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1348 		 bool wait)
1349 {
1350 	struct cifsLockInfo *conf_lock;
1351 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1352 	bool exist;
1353 	int rc = 0;
1354 
1355 try_again:
1356 	exist = false;
1357 	cifs_down_write(&cinode->lock_sem);
1358 
1359 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1360 					lock->type, lock->flags, &conf_lock,
1361 					CIFS_LOCK_OP);
1362 	if (!exist && cinode->can_cache_brlcks) {
1363 		list_add_tail(&lock->llist, &cfile->llist->locks);
1364 		up_write(&cinode->lock_sem);
1365 		return rc;
1366 	}
1367 
1368 	if (!exist)
1369 		rc = 1;
1370 	else if (!wait)
1371 		rc = -EACCES;
1372 	else {
1373 		list_add_tail(&lock->blist, &conf_lock->blist);
1374 		up_write(&cinode->lock_sem);
1375 		rc = wait_event_interruptible(lock->block_q,
1376 					(lock->blist.prev == &lock->blist) &&
1377 					(lock->blist.next == &lock->blist));
1378 		if (!rc)
1379 			goto try_again;
1380 		cifs_down_write(&cinode->lock_sem);
1381 		list_del_init(&lock->blist);
1382 	}
1383 
1384 	up_write(&cinode->lock_sem);
1385 	return rc;
1386 }
1387 
1388 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1389 /*
1390  * Check if there is another lock that prevents us to set the lock (posix
1391  * style). If such a lock exists, update the flock structure with its
1392  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1393  * or leave it the same if we can't. Returns 0 if we don't need to request to
1394  * the server or 1 otherwise.
1395  */
1396 static int
1397 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1398 {
1399 	int rc = 0;
1400 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1401 	unsigned char saved_type = flock->fl_type;
1402 
1403 	if ((flock->fl_flags & FL_POSIX) == 0)
1404 		return 1;
1405 
1406 	down_read(&cinode->lock_sem);
1407 	posix_test_lock(file, flock);
1408 
1409 	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1410 		flock->fl_type = saved_type;
1411 		rc = 1;
1412 	}
1413 
1414 	up_read(&cinode->lock_sem);
1415 	return rc;
1416 }
1417 
1418 /*
1419  * Set the byte-range lock (posix style). Returns:
1420  * 1) <0, if the error occurs while setting the lock;
1421  * 2) 0, if we set the lock and don't need to request to the server;
1422  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1423  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1424  */
1425 static int
1426 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1427 {
1428 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1429 	int rc = FILE_LOCK_DEFERRED + 1;
1430 
1431 	if ((flock->fl_flags & FL_POSIX) == 0)
1432 		return rc;
1433 
1434 	cifs_down_write(&cinode->lock_sem);
1435 	if (!cinode->can_cache_brlcks) {
1436 		up_write(&cinode->lock_sem);
1437 		return rc;
1438 	}
1439 
1440 	rc = posix_lock_file(file, flock, NULL);
1441 	up_write(&cinode->lock_sem);
1442 	return rc;
1443 }
1444 
1445 int
1446 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1447 {
1448 	unsigned int xid;
1449 	int rc = 0, stored_rc;
1450 	struct cifsLockInfo *li, *tmp;
1451 	struct cifs_tcon *tcon;
1452 	unsigned int num, max_num, max_buf;
1453 	LOCKING_ANDX_RANGE *buf, *cur;
1454 	static const int types[] = {
1455 		LOCKING_ANDX_LARGE_FILES,
1456 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1457 	};
1458 	int i;
1459 
1460 	xid = get_xid();
1461 	tcon = tlink_tcon(cfile->tlink);
1462 
1463 	/*
1464 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1465 	 * and check it before using.
1466 	 */
1467 	max_buf = tcon->ses->server->maxBuf;
1468 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1469 		free_xid(xid);
1470 		return -EINVAL;
1471 	}
1472 
1473 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1474 		     PAGE_SIZE);
1475 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1476 			PAGE_SIZE);
1477 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1478 						sizeof(LOCKING_ANDX_RANGE);
1479 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1480 	if (!buf) {
1481 		free_xid(xid);
1482 		return -ENOMEM;
1483 	}
1484 
1485 	for (i = 0; i < 2; i++) {
1486 		cur = buf;
1487 		num = 0;
1488 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1489 			if (li->type != types[i])
1490 				continue;
1491 			cur->Pid = cpu_to_le16(li->pid);
1492 			cur->LengthLow = cpu_to_le32((u32)li->length);
1493 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1494 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1495 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1496 			if (++num == max_num) {
1497 				stored_rc = cifs_lockv(xid, tcon,
1498 						       cfile->fid.netfid,
1499 						       (__u8)li->type, 0, num,
1500 						       buf);
1501 				if (stored_rc)
1502 					rc = stored_rc;
1503 				cur = buf;
1504 				num = 0;
1505 			} else
1506 				cur++;
1507 		}
1508 
1509 		if (num) {
1510 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1511 					       (__u8)types[i], 0, num, buf);
1512 			if (stored_rc)
1513 				rc = stored_rc;
1514 		}
1515 	}
1516 
1517 	kfree(buf);
1518 	free_xid(xid);
1519 	return rc;
1520 }
1521 
1522 static __u32
1523 hash_lockowner(fl_owner_t owner)
1524 {
1525 	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1526 }
1527 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1528 
1529 struct lock_to_push {
1530 	struct list_head llist;
1531 	__u64 offset;
1532 	__u64 length;
1533 	__u32 pid;
1534 	__u16 netfid;
1535 	__u8 type;
1536 };
1537 
1538 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1539 static int
1540 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1541 {
1542 	struct inode *inode = d_inode(cfile->dentry);
1543 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1544 	struct file_lock *flock;
1545 	struct file_lock_context *flctx = locks_inode_context(inode);
1546 	unsigned int count = 0, i;
1547 	int rc = 0, xid, type;
1548 	struct list_head locks_to_send, *el;
1549 	struct lock_to_push *lck, *tmp;
1550 	__u64 length;
1551 
1552 	xid = get_xid();
1553 
1554 	if (!flctx)
1555 		goto out;
1556 
1557 	spin_lock(&flctx->flc_lock);
1558 	list_for_each(el, &flctx->flc_posix) {
1559 		count++;
1560 	}
1561 	spin_unlock(&flctx->flc_lock);
1562 
1563 	INIT_LIST_HEAD(&locks_to_send);
1564 
1565 	/*
1566 	 * Allocating count locks is enough because no FL_POSIX locks can be
1567 	 * added to the list while we are holding cinode->lock_sem that
1568 	 * protects locking operations of this inode.
1569 	 */
1570 	for (i = 0; i < count; i++) {
1571 		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1572 		if (!lck) {
1573 			rc = -ENOMEM;
1574 			goto err_out;
1575 		}
1576 		list_add_tail(&lck->llist, &locks_to_send);
1577 	}
1578 
1579 	el = locks_to_send.next;
1580 	spin_lock(&flctx->flc_lock);
1581 	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1582 		if (el == &locks_to_send) {
1583 			/*
1584 			 * The list ended. We don't have enough allocated
1585 			 * structures - something is really wrong.
1586 			 */
1587 			cifs_dbg(VFS, "Can't push all brlocks!\n");
1588 			break;
1589 		}
1590 		length = cifs_flock_len(flock);
1591 		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1592 			type = CIFS_RDLCK;
1593 		else
1594 			type = CIFS_WRLCK;
1595 		lck = list_entry(el, struct lock_to_push, llist);
1596 		lck->pid = hash_lockowner(flock->fl_owner);
1597 		lck->netfid = cfile->fid.netfid;
1598 		lck->length = length;
1599 		lck->type = type;
1600 		lck->offset = flock->fl_start;
1601 	}
1602 	spin_unlock(&flctx->flc_lock);
1603 
1604 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1605 		int stored_rc;
1606 
1607 		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1608 					     lck->offset, lck->length, NULL,
1609 					     lck->type, 0);
1610 		if (stored_rc)
1611 			rc = stored_rc;
1612 		list_del(&lck->llist);
1613 		kfree(lck);
1614 	}
1615 
1616 out:
1617 	free_xid(xid);
1618 	return rc;
1619 err_out:
1620 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1621 		list_del(&lck->llist);
1622 		kfree(lck);
1623 	}
1624 	goto out;
1625 }
1626 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1627 
1628 static int
1629 cifs_push_locks(struct cifsFileInfo *cfile)
1630 {
1631 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1632 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1633 	int rc = 0;
1634 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1635 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1636 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1637 
1638 	/* we are going to update can_cache_brlcks here - need a write access */
1639 	cifs_down_write(&cinode->lock_sem);
1640 	if (!cinode->can_cache_brlcks) {
1641 		up_write(&cinode->lock_sem);
1642 		return rc;
1643 	}
1644 
1645 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1646 	if (cap_unix(tcon->ses) &&
1647 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1648 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1649 		rc = cifs_push_posix_locks(cfile);
1650 	else
1651 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1652 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1653 
1654 	cinode->can_cache_brlcks = false;
1655 	up_write(&cinode->lock_sem);
1656 	return rc;
1657 }
1658 
1659 static void
1660 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1661 		bool *wait_flag, struct TCP_Server_Info *server)
1662 {
1663 	if (flock->fl_flags & FL_POSIX)
1664 		cifs_dbg(FYI, "Posix\n");
1665 	if (flock->fl_flags & FL_FLOCK)
1666 		cifs_dbg(FYI, "Flock\n");
1667 	if (flock->fl_flags & FL_SLEEP) {
1668 		cifs_dbg(FYI, "Blocking lock\n");
1669 		*wait_flag = true;
1670 	}
1671 	if (flock->fl_flags & FL_ACCESS)
1672 		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1673 	if (flock->fl_flags & FL_LEASE)
1674 		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1675 	if (flock->fl_flags &
1676 	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1677 	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1678 		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1679 
1680 	*type = server->vals->large_lock_type;
1681 	if (flock->fl_type == F_WRLCK) {
1682 		cifs_dbg(FYI, "F_WRLCK\n");
1683 		*type |= server->vals->exclusive_lock_type;
1684 		*lock = 1;
1685 	} else if (flock->fl_type == F_UNLCK) {
1686 		cifs_dbg(FYI, "F_UNLCK\n");
1687 		*type |= server->vals->unlock_lock_type;
1688 		*unlock = 1;
1689 		/* Check if unlock includes more than one lock range */
1690 	} else if (flock->fl_type == F_RDLCK) {
1691 		cifs_dbg(FYI, "F_RDLCK\n");
1692 		*type |= server->vals->shared_lock_type;
1693 		*lock = 1;
1694 	} else if (flock->fl_type == F_EXLCK) {
1695 		cifs_dbg(FYI, "F_EXLCK\n");
1696 		*type |= server->vals->exclusive_lock_type;
1697 		*lock = 1;
1698 	} else if (flock->fl_type == F_SHLCK) {
1699 		cifs_dbg(FYI, "F_SHLCK\n");
1700 		*type |= server->vals->shared_lock_type;
1701 		*lock = 1;
1702 	} else
1703 		cifs_dbg(FYI, "Unknown type of lock\n");
1704 }
1705 
1706 static int
1707 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1708 	   bool wait_flag, bool posix_lck, unsigned int xid)
1709 {
1710 	int rc = 0;
1711 	__u64 length = cifs_flock_len(flock);
1712 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1713 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1714 	struct TCP_Server_Info *server = tcon->ses->server;
1715 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1716 	__u16 netfid = cfile->fid.netfid;
1717 
1718 	if (posix_lck) {
1719 		int posix_lock_type;
1720 
1721 		rc = cifs_posix_lock_test(file, flock);
1722 		if (!rc)
1723 			return rc;
1724 
1725 		if (type & server->vals->shared_lock_type)
1726 			posix_lock_type = CIFS_RDLCK;
1727 		else
1728 			posix_lock_type = CIFS_WRLCK;
1729 		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1730 				      hash_lockowner(flock->fl_owner),
1731 				      flock->fl_start, length, flock,
1732 				      posix_lock_type, wait_flag);
1733 		return rc;
1734 	}
1735 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1736 
1737 	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1738 	if (!rc)
1739 		return rc;
1740 
1741 	/* BB we could chain these into one lock request BB */
1742 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1743 				    1, 0, false);
1744 	if (rc == 0) {
1745 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1746 					    type, 0, 1, false);
1747 		flock->fl_type = F_UNLCK;
1748 		if (rc != 0)
1749 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1750 				 rc);
1751 		return 0;
1752 	}
1753 
1754 	if (type & server->vals->shared_lock_type) {
1755 		flock->fl_type = F_WRLCK;
1756 		return 0;
1757 	}
1758 
1759 	type &= ~server->vals->exclusive_lock_type;
1760 
1761 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1762 				    type | server->vals->shared_lock_type,
1763 				    1, 0, false);
1764 	if (rc == 0) {
1765 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1766 			type | server->vals->shared_lock_type, 0, 1, false);
1767 		flock->fl_type = F_RDLCK;
1768 		if (rc != 0)
1769 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1770 				 rc);
1771 	} else
1772 		flock->fl_type = F_WRLCK;
1773 
1774 	return 0;
1775 }
1776 
1777 void
1778 cifs_move_llist(struct list_head *source, struct list_head *dest)
1779 {
1780 	struct list_head *li, *tmp;
1781 	list_for_each_safe(li, tmp, source)
1782 		list_move(li, dest);
1783 }
1784 
1785 void
1786 cifs_free_llist(struct list_head *llist)
1787 {
1788 	struct cifsLockInfo *li, *tmp;
1789 	list_for_each_entry_safe(li, tmp, llist, llist) {
1790 		cifs_del_lock_waiters(li);
1791 		list_del(&li->llist);
1792 		kfree(li);
1793 	}
1794 }
1795 
1796 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1797 int
1798 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1799 		  unsigned int xid)
1800 {
1801 	int rc = 0, stored_rc;
1802 	static const int types[] = {
1803 		LOCKING_ANDX_LARGE_FILES,
1804 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1805 	};
1806 	unsigned int i;
1807 	unsigned int max_num, num, max_buf;
1808 	LOCKING_ANDX_RANGE *buf, *cur;
1809 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1810 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1811 	struct cifsLockInfo *li, *tmp;
1812 	__u64 length = cifs_flock_len(flock);
1813 	struct list_head tmp_llist;
1814 
1815 	INIT_LIST_HEAD(&tmp_llist);
1816 
1817 	/*
1818 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1819 	 * and check it before using.
1820 	 */
1821 	max_buf = tcon->ses->server->maxBuf;
1822 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1823 		return -EINVAL;
1824 
1825 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1826 		     PAGE_SIZE);
1827 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1828 			PAGE_SIZE);
1829 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1830 						sizeof(LOCKING_ANDX_RANGE);
1831 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1832 	if (!buf)
1833 		return -ENOMEM;
1834 
1835 	cifs_down_write(&cinode->lock_sem);
1836 	for (i = 0; i < 2; i++) {
1837 		cur = buf;
1838 		num = 0;
1839 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1840 			if (flock->fl_start > li->offset ||
1841 			    (flock->fl_start + length) <
1842 			    (li->offset + li->length))
1843 				continue;
1844 			if (current->tgid != li->pid)
1845 				continue;
1846 			if (types[i] != li->type)
1847 				continue;
1848 			if (cinode->can_cache_brlcks) {
1849 				/*
1850 				 * We can cache brlock requests - simply remove
1851 				 * a lock from the file's list.
1852 				 */
1853 				list_del(&li->llist);
1854 				cifs_del_lock_waiters(li);
1855 				kfree(li);
1856 				continue;
1857 			}
1858 			cur->Pid = cpu_to_le16(li->pid);
1859 			cur->LengthLow = cpu_to_le32((u32)li->length);
1860 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1861 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1862 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1863 			/*
1864 			 * We need to save a lock here to let us add it again to
1865 			 * the file's list if the unlock range request fails on
1866 			 * the server.
1867 			 */
1868 			list_move(&li->llist, &tmp_llist);
1869 			if (++num == max_num) {
1870 				stored_rc = cifs_lockv(xid, tcon,
1871 						       cfile->fid.netfid,
1872 						       li->type, num, 0, buf);
1873 				if (stored_rc) {
1874 					/*
1875 					 * We failed on the unlock range
1876 					 * request - add all locks from the tmp
1877 					 * list to the head of the file's list.
1878 					 */
1879 					cifs_move_llist(&tmp_llist,
1880 							&cfile->llist->locks);
1881 					rc = stored_rc;
1882 				} else
1883 					/*
1884 					 * The unlock range request succeed -
1885 					 * free the tmp list.
1886 					 */
1887 					cifs_free_llist(&tmp_llist);
1888 				cur = buf;
1889 				num = 0;
1890 			} else
1891 				cur++;
1892 		}
1893 		if (num) {
1894 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1895 					       types[i], num, 0, buf);
1896 			if (stored_rc) {
1897 				cifs_move_llist(&tmp_llist,
1898 						&cfile->llist->locks);
1899 				rc = stored_rc;
1900 			} else
1901 				cifs_free_llist(&tmp_llist);
1902 		}
1903 	}
1904 
1905 	up_write(&cinode->lock_sem);
1906 	kfree(buf);
1907 	return rc;
1908 }
1909 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1910 
1911 static int
1912 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1913 	   bool wait_flag, bool posix_lck, int lock, int unlock,
1914 	   unsigned int xid)
1915 {
1916 	int rc = 0;
1917 	__u64 length = cifs_flock_len(flock);
1918 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1919 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1920 	struct TCP_Server_Info *server = tcon->ses->server;
1921 	struct inode *inode = d_inode(cfile->dentry);
1922 
1923 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1924 	if (posix_lck) {
1925 		int posix_lock_type;
1926 
1927 		rc = cifs_posix_lock_set(file, flock);
1928 		if (rc <= FILE_LOCK_DEFERRED)
1929 			return rc;
1930 
1931 		if (type & server->vals->shared_lock_type)
1932 			posix_lock_type = CIFS_RDLCK;
1933 		else
1934 			posix_lock_type = CIFS_WRLCK;
1935 
1936 		if (unlock == 1)
1937 			posix_lock_type = CIFS_UNLCK;
1938 
1939 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1940 				      hash_lockowner(flock->fl_owner),
1941 				      flock->fl_start, length,
1942 				      NULL, posix_lock_type, wait_flag);
1943 		goto out;
1944 	}
1945 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1946 	if (lock) {
1947 		struct cifsLockInfo *lock;
1948 
1949 		lock = cifs_lock_init(flock->fl_start, length, type,
1950 				      flock->fl_flags);
1951 		if (!lock)
1952 			return -ENOMEM;
1953 
1954 		rc = cifs_lock_add_if(cfile, lock, wait_flag);
1955 		if (rc < 0) {
1956 			kfree(lock);
1957 			return rc;
1958 		}
1959 		if (!rc)
1960 			goto out;
1961 
1962 		/*
1963 		 * Windows 7 server can delay breaking lease from read to None
1964 		 * if we set a byte-range lock on a file - break it explicitly
1965 		 * before sending the lock to the server to be sure the next
1966 		 * read won't conflict with non-overlapted locks due to
1967 		 * pagereading.
1968 		 */
1969 		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1970 					CIFS_CACHE_READ(CIFS_I(inode))) {
1971 			cifs_zap_mapping(inode);
1972 			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1973 				 inode);
1974 			CIFS_I(inode)->oplock = 0;
1975 		}
1976 
1977 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1978 					    type, 1, 0, wait_flag);
1979 		if (rc) {
1980 			kfree(lock);
1981 			return rc;
1982 		}
1983 
1984 		cifs_lock_add(cfile, lock);
1985 	} else if (unlock)
1986 		rc = server->ops->mand_unlock_range(cfile, flock, xid);
1987 
1988 out:
1989 	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1990 		/*
1991 		 * If this is a request to remove all locks because we
1992 		 * are closing the file, it doesn't matter if the
1993 		 * unlocking failed as both cifs.ko and the SMB server
1994 		 * remove the lock on file close
1995 		 */
1996 		if (rc) {
1997 			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1998 			if (!(flock->fl_flags & FL_CLOSE))
1999 				return rc;
2000 		}
2001 		rc = locks_lock_file_wait(file, flock);
2002 	}
2003 	return rc;
2004 }
2005 
2006 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2007 {
2008 	int rc, xid;
2009 	int lock = 0, unlock = 0;
2010 	bool wait_flag = false;
2011 	bool posix_lck = false;
2012 	struct cifs_sb_info *cifs_sb;
2013 	struct cifs_tcon *tcon;
2014 	struct cifsFileInfo *cfile;
2015 	__u32 type;
2016 
2017 	xid = get_xid();
2018 
2019 	if (!(fl->fl_flags & FL_FLOCK)) {
2020 		rc = -ENOLCK;
2021 		free_xid(xid);
2022 		return rc;
2023 	}
2024 
2025 	cfile = (struct cifsFileInfo *)file->private_data;
2026 	tcon = tlink_tcon(cfile->tlink);
2027 
2028 	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2029 			tcon->ses->server);
2030 	cifs_sb = CIFS_FILE_SB(file);
2031 
2032 	if (cap_unix(tcon->ses) &&
2033 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2034 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2035 		posix_lck = true;
2036 
2037 	if (!lock && !unlock) {
2038 		/*
2039 		 * if no lock or unlock then nothing to do since we do not
2040 		 * know what it is
2041 		 */
2042 		rc = -EOPNOTSUPP;
2043 		free_xid(xid);
2044 		return rc;
2045 	}
2046 
2047 	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2048 			xid);
2049 	free_xid(xid);
2050 	return rc;
2051 
2052 
2053 }
2054 
2055 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2056 {
2057 	int rc, xid;
2058 	int lock = 0, unlock = 0;
2059 	bool wait_flag = false;
2060 	bool posix_lck = false;
2061 	struct cifs_sb_info *cifs_sb;
2062 	struct cifs_tcon *tcon;
2063 	struct cifsFileInfo *cfile;
2064 	__u32 type;
2065 
2066 	rc = -EACCES;
2067 	xid = get_xid();
2068 
2069 	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2070 		 flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2071 		 (long long)flock->fl_end);
2072 
2073 	cfile = (struct cifsFileInfo *)file->private_data;
2074 	tcon = tlink_tcon(cfile->tlink);
2075 
2076 	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2077 			tcon->ses->server);
2078 	cifs_sb = CIFS_FILE_SB(file);
2079 	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2080 
2081 	if (cap_unix(tcon->ses) &&
2082 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2083 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2084 		posix_lck = true;
2085 	/*
2086 	 * BB add code here to normalize offset and length to account for
2087 	 * negative length which we can not accept over the wire.
2088 	 */
2089 	if (IS_GETLK(cmd)) {
2090 		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2091 		free_xid(xid);
2092 		return rc;
2093 	}
2094 
2095 	if (!lock && !unlock) {
2096 		/*
2097 		 * if no lock or unlock then nothing to do since we do not
2098 		 * know what it is
2099 		 */
2100 		free_xid(xid);
2101 		return -EOPNOTSUPP;
2102 	}
2103 
2104 	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2105 			xid);
2106 	free_xid(xid);
2107 	return rc;
2108 }
2109 
2110 /*
2111  * update the file size (if needed) after a write. Should be called with
2112  * the inode->i_lock held
2113  */
2114 void
2115 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2116 		      unsigned int bytes_written)
2117 {
2118 	loff_t end_of_write = offset + bytes_written;
2119 
2120 	if (end_of_write > cifsi->server_eof)
2121 		cifsi->server_eof = end_of_write;
2122 }
2123 
2124 static ssize_t
2125 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2126 	   size_t write_size, loff_t *offset)
2127 {
2128 	int rc = 0;
2129 	unsigned int bytes_written = 0;
2130 	unsigned int total_written;
2131 	struct cifs_tcon *tcon;
2132 	struct TCP_Server_Info *server;
2133 	unsigned int xid;
2134 	struct dentry *dentry = open_file->dentry;
2135 	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2136 	struct cifs_io_parms io_parms = {0};
2137 
2138 	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2139 		 write_size, *offset, dentry);
2140 
2141 	tcon = tlink_tcon(open_file->tlink);
2142 	server = tcon->ses->server;
2143 
2144 	if (!server->ops->sync_write)
2145 		return -ENOSYS;
2146 
2147 	xid = get_xid();
2148 
2149 	for (total_written = 0; write_size > total_written;
2150 	     total_written += bytes_written) {
2151 		rc = -EAGAIN;
2152 		while (rc == -EAGAIN) {
2153 			struct kvec iov[2];
2154 			unsigned int len;
2155 
2156 			if (open_file->invalidHandle) {
2157 				/* we could deadlock if we called
2158 				   filemap_fdatawait from here so tell
2159 				   reopen_file not to flush data to
2160 				   server now */
2161 				rc = cifs_reopen_file(open_file, false);
2162 				if (rc != 0)
2163 					break;
2164 			}
2165 
2166 			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2167 				  (unsigned int)write_size - total_written);
2168 			/* iov[0] is reserved for smb header */
2169 			iov[1].iov_base = (char *)write_data + total_written;
2170 			iov[1].iov_len = len;
2171 			io_parms.pid = pid;
2172 			io_parms.tcon = tcon;
2173 			io_parms.offset = *offset;
2174 			io_parms.length = len;
2175 			rc = server->ops->sync_write(xid, &open_file->fid,
2176 					&io_parms, &bytes_written, iov, 1);
2177 		}
2178 		if (rc || (bytes_written == 0)) {
2179 			if (total_written)
2180 				break;
2181 			else {
2182 				free_xid(xid);
2183 				return rc;
2184 			}
2185 		} else {
2186 			spin_lock(&d_inode(dentry)->i_lock);
2187 			cifs_update_eof(cifsi, *offset, bytes_written);
2188 			spin_unlock(&d_inode(dentry)->i_lock);
2189 			*offset += bytes_written;
2190 		}
2191 	}
2192 
2193 	cifs_stats_bytes_written(tcon, total_written);
2194 
2195 	if (total_written > 0) {
2196 		spin_lock(&d_inode(dentry)->i_lock);
2197 		if (*offset > d_inode(dentry)->i_size) {
2198 			i_size_write(d_inode(dentry), *offset);
2199 			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2200 		}
2201 		spin_unlock(&d_inode(dentry)->i_lock);
2202 	}
2203 	mark_inode_dirty_sync(d_inode(dentry));
2204 	free_xid(xid);
2205 	return total_written;
2206 }
2207 
2208 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2209 					bool fsuid_only)
2210 {
2211 	struct cifsFileInfo *open_file = NULL;
2212 	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2213 
2214 	/* only filter by fsuid on multiuser mounts */
2215 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2216 		fsuid_only = false;
2217 
2218 	spin_lock(&cifs_inode->open_file_lock);
2219 	/* we could simply get the first_list_entry since write-only entries
2220 	   are always at the end of the list but since the first entry might
2221 	   have a close pending, we go through the whole list */
2222 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2223 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2224 			continue;
2225 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2226 			if ((!open_file->invalidHandle)) {
2227 				/* found a good file */
2228 				/* lock it so it will not be closed on us */
2229 				cifsFileInfo_get(open_file);
2230 				spin_unlock(&cifs_inode->open_file_lock);
2231 				return open_file;
2232 			} /* else might as well continue, and look for
2233 			     another, or simply have the caller reopen it
2234 			     again rather than trying to fix this handle */
2235 		} else /* write only file */
2236 			break; /* write only files are last so must be done */
2237 	}
2238 	spin_unlock(&cifs_inode->open_file_lock);
2239 	return NULL;
2240 }
2241 
2242 /* Return -EBADF if no handle is found and general rc otherwise */
2243 int
2244 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2245 		       struct cifsFileInfo **ret_file)
2246 {
2247 	struct cifsFileInfo *open_file, *inv_file = NULL;
2248 	struct cifs_sb_info *cifs_sb;
2249 	bool any_available = false;
2250 	int rc = -EBADF;
2251 	unsigned int refind = 0;
2252 	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2253 	bool with_delete = flags & FIND_WR_WITH_DELETE;
2254 	*ret_file = NULL;
2255 
2256 	/*
2257 	 * Having a null inode here (because mapping->host was set to zero by
2258 	 * the VFS or MM) should not happen but we had reports of on oops (due
2259 	 * to it being zero) during stress testcases so we need to check for it
2260 	 */
2261 
2262 	if (cifs_inode == NULL) {
2263 		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2264 		dump_stack();
2265 		return rc;
2266 	}
2267 
2268 	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2269 
2270 	/* only filter by fsuid on multiuser mounts */
2271 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2272 		fsuid_only = false;
2273 
2274 	spin_lock(&cifs_inode->open_file_lock);
2275 refind_writable:
2276 	if (refind > MAX_REOPEN_ATT) {
2277 		spin_unlock(&cifs_inode->open_file_lock);
2278 		return rc;
2279 	}
2280 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2281 		if (!any_available && open_file->pid != current->tgid)
2282 			continue;
2283 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2284 			continue;
2285 		if (with_delete && !(open_file->fid.access & DELETE))
2286 			continue;
2287 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2288 			if (!open_file->invalidHandle) {
2289 				/* found a good writable file */
2290 				cifsFileInfo_get(open_file);
2291 				spin_unlock(&cifs_inode->open_file_lock);
2292 				*ret_file = open_file;
2293 				return 0;
2294 			} else {
2295 				if (!inv_file)
2296 					inv_file = open_file;
2297 			}
2298 		}
2299 	}
2300 	/* couldn't find useable FH with same pid, try any available */
2301 	if (!any_available) {
2302 		any_available = true;
2303 		goto refind_writable;
2304 	}
2305 
2306 	if (inv_file) {
2307 		any_available = false;
2308 		cifsFileInfo_get(inv_file);
2309 	}
2310 
2311 	spin_unlock(&cifs_inode->open_file_lock);
2312 
2313 	if (inv_file) {
2314 		rc = cifs_reopen_file(inv_file, false);
2315 		if (!rc) {
2316 			*ret_file = inv_file;
2317 			return 0;
2318 		}
2319 
2320 		spin_lock(&cifs_inode->open_file_lock);
2321 		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2322 		spin_unlock(&cifs_inode->open_file_lock);
2323 		cifsFileInfo_put(inv_file);
2324 		++refind;
2325 		inv_file = NULL;
2326 		spin_lock(&cifs_inode->open_file_lock);
2327 		goto refind_writable;
2328 	}
2329 
2330 	return rc;
2331 }
2332 
2333 struct cifsFileInfo *
2334 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2335 {
2336 	struct cifsFileInfo *cfile;
2337 	int rc;
2338 
2339 	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2340 	if (rc)
2341 		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2342 
2343 	return cfile;
2344 }
2345 
2346 int
2347 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2348 		       int flags,
2349 		       struct cifsFileInfo **ret_file)
2350 {
2351 	struct cifsFileInfo *cfile;
2352 	void *page = alloc_dentry_path();
2353 
2354 	*ret_file = NULL;
2355 
2356 	spin_lock(&tcon->open_file_lock);
2357 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2358 		struct cifsInodeInfo *cinode;
2359 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2360 		if (IS_ERR(full_path)) {
2361 			spin_unlock(&tcon->open_file_lock);
2362 			free_dentry_path(page);
2363 			return PTR_ERR(full_path);
2364 		}
2365 		if (strcmp(full_path, name))
2366 			continue;
2367 
2368 		cinode = CIFS_I(d_inode(cfile->dentry));
2369 		spin_unlock(&tcon->open_file_lock);
2370 		free_dentry_path(page);
2371 		return cifs_get_writable_file(cinode, flags, ret_file);
2372 	}
2373 
2374 	spin_unlock(&tcon->open_file_lock);
2375 	free_dentry_path(page);
2376 	return -ENOENT;
2377 }
2378 
2379 int
2380 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2381 		       struct cifsFileInfo **ret_file)
2382 {
2383 	struct cifsFileInfo *cfile;
2384 	void *page = alloc_dentry_path();
2385 
2386 	*ret_file = NULL;
2387 
2388 	spin_lock(&tcon->open_file_lock);
2389 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2390 		struct cifsInodeInfo *cinode;
2391 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2392 		if (IS_ERR(full_path)) {
2393 			spin_unlock(&tcon->open_file_lock);
2394 			free_dentry_path(page);
2395 			return PTR_ERR(full_path);
2396 		}
2397 		if (strcmp(full_path, name))
2398 			continue;
2399 
2400 		cinode = CIFS_I(d_inode(cfile->dentry));
2401 		spin_unlock(&tcon->open_file_lock);
2402 		free_dentry_path(page);
2403 		*ret_file = find_readable_file(cinode, 0);
2404 		return *ret_file ? 0 : -ENOENT;
2405 	}
2406 
2407 	spin_unlock(&tcon->open_file_lock);
2408 	free_dentry_path(page);
2409 	return -ENOENT;
2410 }
2411 
2412 void
2413 cifs_writedata_release(struct kref *refcount)
2414 {
2415 	struct cifs_writedata *wdata = container_of(refcount,
2416 					struct cifs_writedata, refcount);
2417 #ifdef CONFIG_CIFS_SMB_DIRECT
2418 	if (wdata->mr) {
2419 		smbd_deregister_mr(wdata->mr);
2420 		wdata->mr = NULL;
2421 	}
2422 #endif
2423 
2424 	if (wdata->cfile)
2425 		cifsFileInfo_put(wdata->cfile);
2426 
2427 	kfree(wdata);
2428 }
2429 
2430 /*
2431  * Write failed with a retryable error. Resend the write request. It's also
2432  * possible that the page was redirtied so re-clean the page.
2433  */
2434 static void
2435 cifs_writev_requeue(struct cifs_writedata *wdata)
2436 {
2437 	int rc = 0;
2438 	struct inode *inode = d_inode(wdata->cfile->dentry);
2439 	struct TCP_Server_Info *server;
2440 	unsigned int rest_len = wdata->bytes;
2441 	loff_t fpos = wdata->offset;
2442 
2443 	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2444 	do {
2445 		struct cifs_writedata *wdata2;
2446 		unsigned int wsize, cur_len;
2447 
2448 		wsize = server->ops->wp_retry_size(inode);
2449 		if (wsize < rest_len) {
2450 			if (wsize < PAGE_SIZE) {
2451 				rc = -EOPNOTSUPP;
2452 				break;
2453 			}
2454 			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2455 		} else {
2456 			cur_len = rest_len;
2457 		}
2458 
2459 		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2460 		if (!wdata2) {
2461 			rc = -ENOMEM;
2462 			break;
2463 		}
2464 
2465 		wdata2->sync_mode = wdata->sync_mode;
2466 		wdata2->offset	= fpos;
2467 		wdata2->bytes	= cur_len;
2468 		wdata2->iter	= wdata->iter;
2469 
2470 		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2471 		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2472 
2473 		if (iov_iter_is_xarray(&wdata2->iter))
2474 			/* Check for pages having been redirtied and clean
2475 			 * them.  We can do this by walking the xarray.  If
2476 			 * it's not an xarray, then it's a DIO and we shouldn't
2477 			 * be mucking around with the page bits.
2478 			 */
2479 			cifs_undirty_folios(inode, fpos, cur_len);
2480 
2481 		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2482 					    &wdata2->cfile);
2483 		if (!wdata2->cfile) {
2484 			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2485 				 rc);
2486 			if (!is_retryable_error(rc))
2487 				rc = -EBADF;
2488 		} else {
2489 			wdata2->pid = wdata2->cfile->pid;
2490 			rc = server->ops->async_writev(wdata2,
2491 						       cifs_writedata_release);
2492 		}
2493 
2494 		kref_put(&wdata2->refcount, cifs_writedata_release);
2495 		if (rc) {
2496 			if (is_retryable_error(rc))
2497 				continue;
2498 			fpos += cur_len;
2499 			rest_len -= cur_len;
2500 			break;
2501 		}
2502 
2503 		fpos += cur_len;
2504 		rest_len -= cur_len;
2505 	} while (rest_len > 0);
2506 
2507 	/* Clean up remaining pages from the original wdata */
2508 	if (iov_iter_is_xarray(&wdata->iter))
2509 		cifs_pages_write_failed(inode, fpos, rest_len);
2510 
2511 	if (rc != 0 && !is_retryable_error(rc))
2512 		mapping_set_error(inode->i_mapping, rc);
2513 	kref_put(&wdata->refcount, cifs_writedata_release);
2514 }
2515 
2516 void
2517 cifs_writev_complete(struct work_struct *work)
2518 {
2519 	struct cifs_writedata *wdata = container_of(work,
2520 						struct cifs_writedata, work);
2521 	struct inode *inode = d_inode(wdata->cfile->dentry);
2522 
2523 	if (wdata->result == 0) {
2524 		spin_lock(&inode->i_lock);
2525 		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2526 		spin_unlock(&inode->i_lock);
2527 		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2528 					 wdata->bytes);
2529 	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2530 		return cifs_writev_requeue(wdata);
2531 
2532 	if (wdata->result == -EAGAIN)
2533 		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2534 	else if (wdata->result < 0)
2535 		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2536 	else
2537 		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2538 
2539 	if (wdata->result != -EAGAIN)
2540 		mapping_set_error(inode->i_mapping, wdata->result);
2541 	kref_put(&wdata->refcount, cifs_writedata_release);
2542 }
2543 
2544 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2545 {
2546 	struct cifs_writedata *wdata;
2547 
2548 	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2549 	if (wdata != NULL) {
2550 		kref_init(&wdata->refcount);
2551 		INIT_LIST_HEAD(&wdata->list);
2552 		init_completion(&wdata->done);
2553 		INIT_WORK(&wdata->work, complete);
2554 	}
2555 	return wdata;
2556 }
2557 
2558 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2559 {
2560 	struct address_space *mapping = page->mapping;
2561 	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2562 	char *write_data;
2563 	int rc = -EFAULT;
2564 	int bytes_written = 0;
2565 	struct inode *inode;
2566 	struct cifsFileInfo *open_file;
2567 
2568 	if (!mapping || !mapping->host)
2569 		return -EFAULT;
2570 
2571 	inode = page->mapping->host;
2572 
2573 	offset += (loff_t)from;
2574 	write_data = kmap(page);
2575 	write_data += from;
2576 
2577 	if ((to > PAGE_SIZE) || (from > to)) {
2578 		kunmap(page);
2579 		return -EIO;
2580 	}
2581 
2582 	/* racing with truncate? */
2583 	if (offset > mapping->host->i_size) {
2584 		kunmap(page);
2585 		return 0; /* don't care */
2586 	}
2587 
2588 	/* check to make sure that we are not extending the file */
2589 	if (mapping->host->i_size - offset < (loff_t)to)
2590 		to = (unsigned)(mapping->host->i_size - offset);
2591 
2592 	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2593 				    &open_file);
2594 	if (!rc) {
2595 		bytes_written = cifs_write(open_file, open_file->pid,
2596 					   write_data, to - from, &offset);
2597 		cifsFileInfo_put(open_file);
2598 		/* Does mm or vfs already set times? */
2599 		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
2600 		if ((bytes_written > 0) && (offset))
2601 			rc = 0;
2602 		else if (bytes_written < 0)
2603 			rc = bytes_written;
2604 		else
2605 			rc = -EFAULT;
2606 	} else {
2607 		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2608 		if (!is_retryable_error(rc))
2609 			rc = -EIO;
2610 	}
2611 
2612 	kunmap(page);
2613 	return rc;
2614 }
2615 
2616 /*
2617  * Extend the region to be written back to include subsequent contiguously
2618  * dirty pages if possible, but don't sleep while doing so.
2619  */
2620 static void cifs_extend_writeback(struct address_space *mapping,
2621 				  long *_count,
2622 				  loff_t start,
2623 				  int max_pages,
2624 				  size_t max_len,
2625 				  unsigned int *_len)
2626 {
2627 	struct folio_batch batch;
2628 	struct folio *folio;
2629 	unsigned int psize, nr_pages;
2630 	size_t len = *_len;
2631 	pgoff_t index = (start + len) / PAGE_SIZE;
2632 	bool stop = true;
2633 	unsigned int i;
2634 	XA_STATE(xas, &mapping->i_pages, index);
2635 
2636 	folio_batch_init(&batch);
2637 
2638 	do {
2639 		/* Firstly, we gather up a batch of contiguous dirty pages
2640 		 * under the RCU read lock - but we can't clear the dirty flags
2641 		 * there if any of those pages are mapped.
2642 		 */
2643 		rcu_read_lock();
2644 
2645 		xas_for_each(&xas, folio, ULONG_MAX) {
2646 			stop = true;
2647 			if (xas_retry(&xas, folio))
2648 				continue;
2649 			if (xa_is_value(folio))
2650 				break;
2651 			if (folio_index(folio) != index)
2652 				break;
2653 			if (!folio_try_get_rcu(folio)) {
2654 				xas_reset(&xas);
2655 				continue;
2656 			}
2657 			nr_pages = folio_nr_pages(folio);
2658 			if (nr_pages > max_pages)
2659 				break;
2660 
2661 			/* Has the page moved or been split? */
2662 			if (unlikely(folio != xas_reload(&xas))) {
2663 				folio_put(folio);
2664 				break;
2665 			}
2666 
2667 			if (!folio_trylock(folio)) {
2668 				folio_put(folio);
2669 				break;
2670 			}
2671 			if (!folio_test_dirty(folio) || folio_test_writeback(folio)) {
2672 				folio_unlock(folio);
2673 				folio_put(folio);
2674 				break;
2675 			}
2676 
2677 			max_pages -= nr_pages;
2678 			psize = folio_size(folio);
2679 			len += psize;
2680 			stop = false;
2681 			if (max_pages <= 0 || len >= max_len || *_count <= 0)
2682 				stop = true;
2683 
2684 			index += nr_pages;
2685 			if (!folio_batch_add(&batch, folio))
2686 				break;
2687 			if (stop)
2688 				break;
2689 		}
2690 
2691 		if (!stop)
2692 			xas_pause(&xas);
2693 		rcu_read_unlock();
2694 
2695 		/* Now, if we obtained any pages, we can shift them to being
2696 		 * writable and mark them for caching.
2697 		 */
2698 		if (!folio_batch_count(&batch))
2699 			break;
2700 
2701 		for (i = 0; i < folio_batch_count(&batch); i++) {
2702 			folio = batch.folios[i];
2703 			/* The folio should be locked, dirty and not undergoing
2704 			 * writeback from the loop above.
2705 			 */
2706 			if (!folio_clear_dirty_for_io(folio))
2707 				WARN_ON(1);
2708 			if (folio_start_writeback(folio))
2709 				WARN_ON(1);
2710 
2711 			*_count -= folio_nr_pages(folio);
2712 			folio_unlock(folio);
2713 		}
2714 
2715 		folio_batch_release(&batch);
2716 		cond_resched();
2717 	} while (!stop);
2718 
2719 	*_len = len;
2720 }
2721 
2722 /*
2723  * Write back the locked page and any subsequent non-locked dirty pages.
2724  */
2725 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2726 						 struct writeback_control *wbc,
2727 						 struct folio *folio,
2728 						 loff_t start, loff_t end)
2729 {
2730 	struct inode *inode = mapping->host;
2731 	struct TCP_Server_Info *server;
2732 	struct cifs_writedata *wdata;
2733 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2734 	struct cifs_credits credits_on_stack;
2735 	struct cifs_credits *credits = &credits_on_stack;
2736 	struct cifsFileInfo *cfile = NULL;
2737 	unsigned int xid, wsize, len;
2738 	loff_t i_size = i_size_read(inode);
2739 	size_t max_len;
2740 	long count = wbc->nr_to_write;
2741 	int rc;
2742 
2743 	/* The folio should be locked, dirty and not undergoing writeback. */
2744 	if (folio_start_writeback(folio))
2745 		WARN_ON(1);
2746 
2747 	count -= folio_nr_pages(folio);
2748 	len = folio_size(folio);
2749 
2750 	xid = get_xid();
2751 	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2752 
2753 	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2754 	if (rc) {
2755 		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2756 		goto err_xid;
2757 	}
2758 
2759 	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2760 					   &wsize, credits);
2761 	if (rc != 0)
2762 		goto err_close;
2763 
2764 	wdata = cifs_writedata_alloc(cifs_writev_complete);
2765 	if (!wdata) {
2766 		rc = -ENOMEM;
2767 		goto err_uncredit;
2768 	}
2769 
2770 	wdata->sync_mode = wbc->sync_mode;
2771 	wdata->offset = folio_pos(folio);
2772 	wdata->pid = cfile->pid;
2773 	wdata->credits = credits_on_stack;
2774 	wdata->cfile = cfile;
2775 	wdata->server = server;
2776 	cfile = NULL;
2777 
2778 	/* Find all consecutive lockable dirty pages, stopping when we find a
2779 	 * page that is not immediately lockable, is not dirty or is missing,
2780 	 * or we reach the end of the range.
2781 	 */
2782 	if (start < i_size) {
2783 		/* Trim the write to the EOF; the extra data is ignored.  Also
2784 		 * put an upper limit on the size of a single storedata op.
2785 		 */
2786 		max_len = wsize;
2787 		max_len = min_t(unsigned long long, max_len, end - start + 1);
2788 		max_len = min_t(unsigned long long, max_len, i_size - start);
2789 
2790 		if (len < max_len) {
2791 			int max_pages = INT_MAX;
2792 
2793 #ifdef CONFIG_CIFS_SMB_DIRECT
2794 			if (server->smbd_conn)
2795 				max_pages = server->smbd_conn->max_frmr_depth;
2796 #endif
2797 			max_pages -= folio_nr_pages(folio);
2798 
2799 			if (max_pages > 0)
2800 				cifs_extend_writeback(mapping, &count, start,
2801 						      max_pages, max_len, &len);
2802 		}
2803 		len = min_t(loff_t, len, max_len);
2804 	}
2805 
2806 	wdata->bytes = len;
2807 
2808 	/* We now have a contiguous set of dirty pages, each with writeback
2809 	 * set; the first page is still locked at this point, but all the rest
2810 	 * have been unlocked.
2811 	 */
2812 	folio_unlock(folio);
2813 
2814 	if (start < i_size) {
2815 		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2816 				start, len);
2817 
2818 		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2819 		if (rc)
2820 			goto err_wdata;
2821 
2822 		if (wdata->cfile->invalidHandle)
2823 			rc = -EAGAIN;
2824 		else
2825 			rc = wdata->server->ops->async_writev(wdata,
2826 							      cifs_writedata_release);
2827 		if (rc >= 0) {
2828 			kref_put(&wdata->refcount, cifs_writedata_release);
2829 			goto err_close;
2830 		}
2831 	} else {
2832 		/* The dirty region was entirely beyond the EOF. */
2833 		cifs_pages_written_back(inode, start, len);
2834 		rc = 0;
2835 	}
2836 
2837 err_wdata:
2838 	kref_put(&wdata->refcount, cifs_writedata_release);
2839 err_uncredit:
2840 	add_credits_and_wake_if(server, credits, 0);
2841 err_close:
2842 	if (cfile)
2843 		cifsFileInfo_put(cfile);
2844 err_xid:
2845 	free_xid(xid);
2846 	if (rc == 0) {
2847 		wbc->nr_to_write = count;
2848 		rc = len;
2849 	} else if (is_retryable_error(rc)) {
2850 		cifs_pages_write_redirty(inode, start, len);
2851 	} else {
2852 		cifs_pages_write_failed(inode, start, len);
2853 		mapping_set_error(mapping, rc);
2854 	}
2855 	/* Indication to update ctime and mtime as close is deferred */
2856 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2857 	return rc;
2858 }
2859 
2860 /*
2861  * write a region of pages back to the server
2862  */
2863 static int cifs_writepages_region(struct address_space *mapping,
2864 				  struct writeback_control *wbc,
2865 				  loff_t start, loff_t end, loff_t *_next)
2866 {
2867 	struct folio_batch fbatch;
2868 	int skips = 0;
2869 
2870 	folio_batch_init(&fbatch);
2871 	do {
2872 		int nr;
2873 		pgoff_t index = start / PAGE_SIZE;
2874 
2875 		nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
2876 					    PAGECACHE_TAG_DIRTY, &fbatch);
2877 		if (!nr)
2878 			break;
2879 
2880 		for (int i = 0; i < nr; i++) {
2881 			ssize_t ret;
2882 			struct folio *folio = fbatch.folios[i];
2883 
2884 redo_folio:
2885 			start = folio_pos(folio); /* May regress with THPs */
2886 
2887 			/* At this point we hold neither the i_pages lock nor the
2888 			 * page lock: the page may be truncated or invalidated
2889 			 * (changing page->mapping to NULL), or even swizzled
2890 			 * back from swapper_space to tmpfs file mapping
2891 			 */
2892 			if (wbc->sync_mode != WB_SYNC_NONE) {
2893 				ret = folio_lock_killable(folio);
2894 				if (ret < 0)
2895 					goto write_error;
2896 			} else {
2897 				if (!folio_trylock(folio))
2898 					goto skip_write;
2899 			}
2900 
2901 			if (folio_mapping(folio) != mapping ||
2902 			    !folio_test_dirty(folio)) {
2903 				start += folio_size(folio);
2904 				folio_unlock(folio);
2905 				continue;
2906 			}
2907 
2908 			if (folio_test_writeback(folio) ||
2909 			    folio_test_fscache(folio)) {
2910 				folio_unlock(folio);
2911 				if (wbc->sync_mode == WB_SYNC_NONE)
2912 					goto skip_write;
2913 
2914 				folio_wait_writeback(folio);
2915 #ifdef CONFIG_CIFS_FSCACHE
2916 				folio_wait_fscache(folio);
2917 #endif
2918 				goto redo_folio;
2919 			}
2920 
2921 			if (!folio_clear_dirty_for_io(folio))
2922 				/* We hold the page lock - it should've been dirty. */
2923 				WARN_ON(1);
2924 
2925 			ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
2926 			if (ret < 0)
2927 				goto write_error;
2928 
2929 			start += ret;
2930 			continue;
2931 
2932 write_error:
2933 			folio_batch_release(&fbatch);
2934 			*_next = start;
2935 			return ret;
2936 
2937 skip_write:
2938 			/*
2939 			 * Too many skipped writes, or need to reschedule?
2940 			 * Treat it as a write error without an error code.
2941 			 */
2942 			if (skips >= 5 || need_resched()) {
2943 				ret = 0;
2944 				goto write_error;
2945 			}
2946 
2947 			/* Otherwise, just skip that folio and go on to the next */
2948 			skips++;
2949 			start += folio_size(folio);
2950 			continue;
2951 		}
2952 
2953 		folio_batch_release(&fbatch);
2954 		cond_resched();
2955 	} while (wbc->nr_to_write > 0);
2956 
2957 	*_next = start;
2958 	return 0;
2959 }
2960 
2961 /*
2962  * Write some of the pending data back to the server
2963  */
2964 static int cifs_writepages(struct address_space *mapping,
2965 			   struct writeback_control *wbc)
2966 {
2967 	loff_t start, next;
2968 	int ret;
2969 
2970 	/* We have to be careful as we can end up racing with setattr()
2971 	 * truncating the pagecache since the caller doesn't take a lock here
2972 	 * to prevent it.
2973 	 */
2974 
2975 	if (wbc->range_cyclic) {
2976 		start = mapping->writeback_index * PAGE_SIZE;
2977 		ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
2978 		if (ret == 0) {
2979 			mapping->writeback_index = next / PAGE_SIZE;
2980 			if (start > 0 && wbc->nr_to_write > 0) {
2981 				ret = cifs_writepages_region(mapping, wbc, 0,
2982 							     start, &next);
2983 				if (ret == 0)
2984 					mapping->writeback_index =
2985 						next / PAGE_SIZE;
2986 			}
2987 		}
2988 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
2989 		ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
2990 		if (wbc->nr_to_write > 0 && ret == 0)
2991 			mapping->writeback_index = next / PAGE_SIZE;
2992 	} else {
2993 		ret = cifs_writepages_region(mapping, wbc,
2994 					     wbc->range_start, wbc->range_end, &next);
2995 	}
2996 
2997 	return ret;
2998 }
2999 
3000 static int
3001 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3002 {
3003 	int rc;
3004 	unsigned int xid;
3005 
3006 	xid = get_xid();
3007 /* BB add check for wbc flags */
3008 	get_page(page);
3009 	if (!PageUptodate(page))
3010 		cifs_dbg(FYI, "ppw - page not up to date\n");
3011 
3012 	/*
3013 	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3014 	 *
3015 	 * A writepage() implementation always needs to do either this,
3016 	 * or re-dirty the page with "redirty_page_for_writepage()" in
3017 	 * the case of a failure.
3018 	 *
3019 	 * Just unlocking the page will cause the radix tree tag-bits
3020 	 * to fail to update with the state of the page correctly.
3021 	 */
3022 	set_page_writeback(page);
3023 retry_write:
3024 	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3025 	if (is_retryable_error(rc)) {
3026 		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3027 			goto retry_write;
3028 		redirty_page_for_writepage(wbc, page);
3029 	} else if (rc != 0) {
3030 		SetPageError(page);
3031 		mapping_set_error(page->mapping, rc);
3032 	} else {
3033 		SetPageUptodate(page);
3034 	}
3035 	end_page_writeback(page);
3036 	put_page(page);
3037 	free_xid(xid);
3038 	return rc;
3039 }
3040 
3041 static int cifs_write_end(struct file *file, struct address_space *mapping,
3042 			loff_t pos, unsigned len, unsigned copied,
3043 			struct page *page, void *fsdata)
3044 {
3045 	int rc;
3046 	struct inode *inode = mapping->host;
3047 	struct cifsFileInfo *cfile = file->private_data;
3048 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3049 	struct folio *folio = page_folio(page);
3050 	__u32 pid;
3051 
3052 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3053 		pid = cfile->pid;
3054 	else
3055 		pid = current->tgid;
3056 
3057 	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3058 		 page, pos, copied);
3059 
3060 	if (folio_test_checked(folio)) {
3061 		if (copied == len)
3062 			folio_mark_uptodate(folio);
3063 		folio_clear_checked(folio);
3064 	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3065 		folio_mark_uptodate(folio);
3066 
3067 	if (!folio_test_uptodate(folio)) {
3068 		char *page_data;
3069 		unsigned offset = pos & (PAGE_SIZE - 1);
3070 		unsigned int xid;
3071 
3072 		xid = get_xid();
3073 		/* this is probably better than directly calling
3074 		   partialpage_write since in this function the file handle is
3075 		   known which we might as well	leverage */
3076 		/* BB check if anything else missing out of ppw
3077 		   such as updating last write time */
3078 		page_data = kmap(page);
3079 		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3080 		/* if (rc < 0) should we set writebehind rc? */
3081 		kunmap(page);
3082 
3083 		free_xid(xid);
3084 	} else {
3085 		rc = copied;
3086 		pos += copied;
3087 		set_page_dirty(page);
3088 	}
3089 
3090 	if (rc > 0) {
3091 		spin_lock(&inode->i_lock);
3092 		if (pos > inode->i_size) {
3093 			i_size_write(inode, pos);
3094 			inode->i_blocks = (512 - 1 + pos) >> 9;
3095 		}
3096 		spin_unlock(&inode->i_lock);
3097 	}
3098 
3099 	unlock_page(page);
3100 	put_page(page);
3101 	/* Indication to update ctime and mtime as close is deferred */
3102 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3103 
3104 	return rc;
3105 }
3106 
3107 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3108 		      int datasync)
3109 {
3110 	unsigned int xid;
3111 	int rc = 0;
3112 	struct cifs_tcon *tcon;
3113 	struct TCP_Server_Info *server;
3114 	struct cifsFileInfo *smbfile = file->private_data;
3115 	struct inode *inode = file_inode(file);
3116 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3117 
3118 	rc = file_write_and_wait_range(file, start, end);
3119 	if (rc) {
3120 		trace_cifs_fsync_err(inode->i_ino, rc);
3121 		return rc;
3122 	}
3123 
3124 	xid = get_xid();
3125 
3126 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3127 		 file, datasync);
3128 
3129 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3130 		rc = cifs_zap_mapping(inode);
3131 		if (rc) {
3132 			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3133 			rc = 0; /* don't care about it in fsync */
3134 		}
3135 	}
3136 
3137 	tcon = tlink_tcon(smbfile->tlink);
3138 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3139 		server = tcon->ses->server;
3140 		if (server->ops->flush == NULL) {
3141 			rc = -ENOSYS;
3142 			goto strict_fsync_exit;
3143 		}
3144 
3145 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3146 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3147 			if (smbfile) {
3148 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3149 				cifsFileInfo_put(smbfile);
3150 			} else
3151 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3152 		} else
3153 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3154 	}
3155 
3156 strict_fsync_exit:
3157 	free_xid(xid);
3158 	return rc;
3159 }
3160 
3161 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3162 {
3163 	unsigned int xid;
3164 	int rc = 0;
3165 	struct cifs_tcon *tcon;
3166 	struct TCP_Server_Info *server;
3167 	struct cifsFileInfo *smbfile = file->private_data;
3168 	struct inode *inode = file_inode(file);
3169 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3170 
3171 	rc = file_write_and_wait_range(file, start, end);
3172 	if (rc) {
3173 		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3174 		return rc;
3175 	}
3176 
3177 	xid = get_xid();
3178 
3179 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3180 		 file, datasync);
3181 
3182 	tcon = tlink_tcon(smbfile->tlink);
3183 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3184 		server = tcon->ses->server;
3185 		if (server->ops->flush == NULL) {
3186 			rc = -ENOSYS;
3187 			goto fsync_exit;
3188 		}
3189 
3190 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3191 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3192 			if (smbfile) {
3193 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3194 				cifsFileInfo_put(smbfile);
3195 			} else
3196 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3197 		} else
3198 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3199 	}
3200 
3201 fsync_exit:
3202 	free_xid(xid);
3203 	return rc;
3204 }
3205 
3206 /*
3207  * As file closes, flush all cached write data for this inode checking
3208  * for write behind errors.
3209  */
3210 int cifs_flush(struct file *file, fl_owner_t id)
3211 {
3212 	struct inode *inode = file_inode(file);
3213 	int rc = 0;
3214 
3215 	if (file->f_mode & FMODE_WRITE)
3216 		rc = filemap_write_and_wait(inode->i_mapping);
3217 
3218 	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3219 	if (rc) {
3220 		/* get more nuanced writeback errors */
3221 		rc = filemap_check_wb_err(file->f_mapping, 0);
3222 		trace_cifs_flush_err(inode->i_ino, rc);
3223 	}
3224 	return rc;
3225 }
3226 
3227 static void
3228 cifs_uncached_writedata_release(struct kref *refcount)
3229 {
3230 	struct cifs_writedata *wdata = container_of(refcount,
3231 					struct cifs_writedata, refcount);
3232 
3233 	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3234 	cifs_writedata_release(refcount);
3235 }
3236 
3237 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3238 
3239 static void
3240 cifs_uncached_writev_complete(struct work_struct *work)
3241 {
3242 	struct cifs_writedata *wdata = container_of(work,
3243 					struct cifs_writedata, work);
3244 	struct inode *inode = d_inode(wdata->cfile->dentry);
3245 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3246 
3247 	spin_lock(&inode->i_lock);
3248 	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3249 	if (cifsi->server_eof > inode->i_size)
3250 		i_size_write(inode, cifsi->server_eof);
3251 	spin_unlock(&inode->i_lock);
3252 
3253 	complete(&wdata->done);
3254 	collect_uncached_write_data(wdata->ctx);
3255 	/* the below call can possibly free the last ref to aio ctx */
3256 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3257 }
3258 
3259 static int
3260 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3261 	struct cifs_aio_ctx *ctx)
3262 {
3263 	unsigned int wsize;
3264 	struct cifs_credits credits;
3265 	int rc;
3266 	struct TCP_Server_Info *server = wdata->server;
3267 
3268 	do {
3269 		if (wdata->cfile->invalidHandle) {
3270 			rc = cifs_reopen_file(wdata->cfile, false);
3271 			if (rc == -EAGAIN)
3272 				continue;
3273 			else if (rc)
3274 				break;
3275 		}
3276 
3277 
3278 		/*
3279 		 * Wait for credits to resend this wdata.
3280 		 * Note: we are attempting to resend the whole wdata not in
3281 		 * segments
3282 		 */
3283 		do {
3284 			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3285 						&wsize, &credits);
3286 			if (rc)
3287 				goto fail;
3288 
3289 			if (wsize < wdata->bytes) {
3290 				add_credits_and_wake_if(server, &credits, 0);
3291 				msleep(1000);
3292 			}
3293 		} while (wsize < wdata->bytes);
3294 		wdata->credits = credits;
3295 
3296 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3297 
3298 		if (!rc) {
3299 			if (wdata->cfile->invalidHandle)
3300 				rc = -EAGAIN;
3301 			else {
3302 #ifdef CONFIG_CIFS_SMB_DIRECT
3303 				if (wdata->mr) {
3304 					wdata->mr->need_invalidate = true;
3305 					smbd_deregister_mr(wdata->mr);
3306 					wdata->mr = NULL;
3307 				}
3308 #endif
3309 				rc = server->ops->async_writev(wdata,
3310 					cifs_uncached_writedata_release);
3311 			}
3312 		}
3313 
3314 		/* If the write was successfully sent, we are done */
3315 		if (!rc) {
3316 			list_add_tail(&wdata->list, wdata_list);
3317 			return 0;
3318 		}
3319 
3320 		/* Roll back credits and retry if needed */
3321 		add_credits_and_wake_if(server, &wdata->credits, 0);
3322 	} while (rc == -EAGAIN);
3323 
3324 fail:
3325 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3326 	return rc;
3327 }
3328 
3329 /*
3330  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3331  * size and maximum number of segments.
3332  */
3333 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3334 				     size_t max_segs, unsigned int *_nsegs)
3335 {
3336 	const struct bio_vec *bvecs = iter->bvec;
3337 	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3338 	size_t len, span = 0, n = iter->count;
3339 	size_t skip = iter->iov_offset;
3340 
3341 	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3342 		return 0;
3343 
3344 	while (n && ix < nbv && skip) {
3345 		len = bvecs[ix].bv_len;
3346 		if (skip < len)
3347 			break;
3348 		skip -= len;
3349 		n -= len;
3350 		ix++;
3351 	}
3352 
3353 	while (n && ix < nbv) {
3354 		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3355 		span += len;
3356 		max_size -= len;
3357 		nsegs++;
3358 		ix++;
3359 		if (max_size == 0 || nsegs >= max_segs)
3360 			break;
3361 		skip = 0;
3362 		n -= len;
3363 	}
3364 
3365 	*_nsegs = nsegs;
3366 	return span;
3367 }
3368 
3369 static int
3370 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3371 		     struct cifsFileInfo *open_file,
3372 		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3373 		     struct cifs_aio_ctx *ctx)
3374 {
3375 	int rc = 0;
3376 	size_t cur_len, max_len;
3377 	struct cifs_writedata *wdata;
3378 	pid_t pid;
3379 	struct TCP_Server_Info *server;
3380 	unsigned int xid, max_segs = INT_MAX;
3381 
3382 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3383 		pid = open_file->pid;
3384 	else
3385 		pid = current->tgid;
3386 
3387 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3388 	xid = get_xid();
3389 
3390 #ifdef CONFIG_CIFS_SMB_DIRECT
3391 	if (server->smbd_conn)
3392 		max_segs = server->smbd_conn->max_frmr_depth;
3393 #endif
3394 
3395 	do {
3396 		struct cifs_credits credits_on_stack;
3397 		struct cifs_credits *credits = &credits_on_stack;
3398 		unsigned int wsize, nsegs = 0;
3399 
3400 		if (signal_pending(current)) {
3401 			rc = -EINTR;
3402 			break;
3403 		}
3404 
3405 		if (open_file->invalidHandle) {
3406 			rc = cifs_reopen_file(open_file, false);
3407 			if (rc == -EAGAIN)
3408 				continue;
3409 			else if (rc)
3410 				break;
3411 		}
3412 
3413 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3414 						   &wsize, credits);
3415 		if (rc)
3416 			break;
3417 
3418 		max_len = min_t(const size_t, len, wsize);
3419 		if (!max_len) {
3420 			rc = -EAGAIN;
3421 			add_credits_and_wake_if(server, credits, 0);
3422 			break;
3423 		}
3424 
3425 		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3426 		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3427 			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3428 		if (cur_len == 0) {
3429 			rc = -EIO;
3430 			add_credits_and_wake_if(server, credits, 0);
3431 			break;
3432 		}
3433 
3434 		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3435 		if (!wdata) {
3436 			rc = -ENOMEM;
3437 			add_credits_and_wake_if(server, credits, 0);
3438 			break;
3439 		}
3440 
3441 		wdata->sync_mode = WB_SYNC_ALL;
3442 		wdata->offset	= (__u64)fpos;
3443 		wdata->cfile	= cifsFileInfo_get(open_file);
3444 		wdata->server	= server;
3445 		wdata->pid	= pid;
3446 		wdata->bytes	= cur_len;
3447 		wdata->credits	= credits_on_stack;
3448 		wdata->iter	= *from;
3449 		wdata->ctx	= ctx;
3450 		kref_get(&ctx->refcount);
3451 
3452 		iov_iter_truncate(&wdata->iter, cur_len);
3453 
3454 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3455 
3456 		if (!rc) {
3457 			if (wdata->cfile->invalidHandle)
3458 				rc = -EAGAIN;
3459 			else
3460 				rc = server->ops->async_writev(wdata,
3461 					cifs_uncached_writedata_release);
3462 		}
3463 
3464 		if (rc) {
3465 			add_credits_and_wake_if(server, &wdata->credits, 0);
3466 			kref_put(&wdata->refcount,
3467 				 cifs_uncached_writedata_release);
3468 			if (rc == -EAGAIN)
3469 				continue;
3470 			break;
3471 		}
3472 
3473 		list_add_tail(&wdata->list, wdata_list);
3474 		iov_iter_advance(from, cur_len);
3475 		fpos += cur_len;
3476 		len -= cur_len;
3477 	} while (len > 0);
3478 
3479 	free_xid(xid);
3480 	return rc;
3481 }
3482 
3483 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3484 {
3485 	struct cifs_writedata *wdata, *tmp;
3486 	struct cifs_tcon *tcon;
3487 	struct cifs_sb_info *cifs_sb;
3488 	struct dentry *dentry = ctx->cfile->dentry;
3489 	ssize_t rc;
3490 
3491 	tcon = tlink_tcon(ctx->cfile->tlink);
3492 	cifs_sb = CIFS_SB(dentry->d_sb);
3493 
3494 	mutex_lock(&ctx->aio_mutex);
3495 
3496 	if (list_empty(&ctx->list)) {
3497 		mutex_unlock(&ctx->aio_mutex);
3498 		return;
3499 	}
3500 
3501 	rc = ctx->rc;
3502 	/*
3503 	 * Wait for and collect replies for any successful sends in order of
3504 	 * increasing offset. Once an error is hit, then return without waiting
3505 	 * for any more replies.
3506 	 */
3507 restart_loop:
3508 	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3509 		if (!rc) {
3510 			if (!try_wait_for_completion(&wdata->done)) {
3511 				mutex_unlock(&ctx->aio_mutex);
3512 				return;
3513 			}
3514 
3515 			if (wdata->result)
3516 				rc = wdata->result;
3517 			else
3518 				ctx->total_len += wdata->bytes;
3519 
3520 			/* resend call if it's a retryable error */
3521 			if (rc == -EAGAIN) {
3522 				struct list_head tmp_list;
3523 				struct iov_iter tmp_from = ctx->iter;
3524 
3525 				INIT_LIST_HEAD(&tmp_list);
3526 				list_del_init(&wdata->list);
3527 
3528 				if (ctx->direct_io)
3529 					rc = cifs_resend_wdata(
3530 						wdata, &tmp_list, ctx);
3531 				else {
3532 					iov_iter_advance(&tmp_from,
3533 						 wdata->offset - ctx->pos);
3534 
3535 					rc = cifs_write_from_iter(wdata->offset,
3536 						wdata->bytes, &tmp_from,
3537 						ctx->cfile, cifs_sb, &tmp_list,
3538 						ctx);
3539 
3540 					kref_put(&wdata->refcount,
3541 						cifs_uncached_writedata_release);
3542 				}
3543 
3544 				list_splice(&tmp_list, &ctx->list);
3545 				goto restart_loop;
3546 			}
3547 		}
3548 		list_del_init(&wdata->list);
3549 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3550 	}
3551 
3552 	cifs_stats_bytes_written(tcon, ctx->total_len);
3553 	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3554 
3555 	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3556 
3557 	mutex_unlock(&ctx->aio_mutex);
3558 
3559 	if (ctx->iocb && ctx->iocb->ki_complete)
3560 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3561 	else
3562 		complete(&ctx->done);
3563 }
3564 
3565 static ssize_t __cifs_writev(
3566 	struct kiocb *iocb, struct iov_iter *from, bool direct)
3567 {
3568 	struct file *file = iocb->ki_filp;
3569 	ssize_t total_written = 0;
3570 	struct cifsFileInfo *cfile;
3571 	struct cifs_tcon *tcon;
3572 	struct cifs_sb_info *cifs_sb;
3573 	struct cifs_aio_ctx *ctx;
3574 	int rc;
3575 
3576 	rc = generic_write_checks(iocb, from);
3577 	if (rc <= 0)
3578 		return rc;
3579 
3580 	cifs_sb = CIFS_FILE_SB(file);
3581 	cfile = file->private_data;
3582 	tcon = tlink_tcon(cfile->tlink);
3583 
3584 	if (!tcon->ses->server->ops->async_writev)
3585 		return -ENOSYS;
3586 
3587 	ctx = cifs_aio_ctx_alloc();
3588 	if (!ctx)
3589 		return -ENOMEM;
3590 
3591 	ctx->cfile = cifsFileInfo_get(cfile);
3592 
3593 	if (!is_sync_kiocb(iocb))
3594 		ctx->iocb = iocb;
3595 
3596 	ctx->pos = iocb->ki_pos;
3597 	ctx->direct_io = direct;
3598 	ctx->nr_pinned_pages = 0;
3599 
3600 	if (user_backed_iter(from)) {
3601 		/*
3602 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3603 		 * they contain references to the calling process's virtual
3604 		 * memory layout which won't be available in an async worker
3605 		 * thread.  This also takes a pin on every folio involved.
3606 		 */
3607 		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3608 					     &ctx->iter, 0);
3609 		if (rc < 0) {
3610 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3611 			return rc;
3612 		}
3613 
3614 		ctx->nr_pinned_pages = rc;
3615 		ctx->bv = (void *)ctx->iter.bvec;
3616 		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3617 	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3618 		   !is_sync_kiocb(iocb)) {
3619 		/*
3620 		 * If the op is asynchronous, we need to copy the list attached
3621 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3622 		 * will be pinned by the caller; in any case, we may or may not
3623 		 * be able to pin the pages, so we don't try.
3624 		 */
3625 		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3626 		if (!ctx->bv) {
3627 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3628 			return -ENOMEM;
3629 		}
3630 	} else {
3631 		/*
3632 		 * Otherwise, we just pass the iterator down as-is and rely on
3633 		 * the caller to make sure the pages referred to by the
3634 		 * iterator don't evaporate.
3635 		 */
3636 		ctx->iter = *from;
3637 	}
3638 
3639 	ctx->len = iov_iter_count(&ctx->iter);
3640 
3641 	/* grab a lock here due to read response handlers can access ctx */
3642 	mutex_lock(&ctx->aio_mutex);
3643 
3644 	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3645 				  cfile, cifs_sb, &ctx->list, ctx);
3646 
3647 	/*
3648 	 * If at least one write was successfully sent, then discard any rc
3649 	 * value from the later writes. If the other write succeeds, then
3650 	 * we'll end up returning whatever was written. If it fails, then
3651 	 * we'll get a new rc value from that.
3652 	 */
3653 	if (!list_empty(&ctx->list))
3654 		rc = 0;
3655 
3656 	mutex_unlock(&ctx->aio_mutex);
3657 
3658 	if (rc) {
3659 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3660 		return rc;
3661 	}
3662 
3663 	if (!is_sync_kiocb(iocb)) {
3664 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3665 		return -EIOCBQUEUED;
3666 	}
3667 
3668 	rc = wait_for_completion_killable(&ctx->done);
3669 	if (rc) {
3670 		mutex_lock(&ctx->aio_mutex);
3671 		ctx->rc = rc = -EINTR;
3672 		total_written = ctx->total_len;
3673 		mutex_unlock(&ctx->aio_mutex);
3674 	} else {
3675 		rc = ctx->rc;
3676 		total_written = ctx->total_len;
3677 	}
3678 
3679 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3680 
3681 	if (unlikely(!total_written))
3682 		return rc;
3683 
3684 	iocb->ki_pos += total_written;
3685 	return total_written;
3686 }
3687 
3688 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3689 {
3690 	struct file *file = iocb->ki_filp;
3691 
3692 	cifs_revalidate_mapping(file->f_inode);
3693 	return __cifs_writev(iocb, from, true);
3694 }
3695 
3696 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3697 {
3698 	return __cifs_writev(iocb, from, false);
3699 }
3700 
3701 static ssize_t
3702 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3703 {
3704 	struct file *file = iocb->ki_filp;
3705 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3706 	struct inode *inode = file->f_mapping->host;
3707 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3708 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3709 	ssize_t rc;
3710 
3711 	inode_lock(inode);
3712 	/*
3713 	 * We need to hold the sem to be sure nobody modifies lock list
3714 	 * with a brlock that prevents writing.
3715 	 */
3716 	down_read(&cinode->lock_sem);
3717 
3718 	rc = generic_write_checks(iocb, from);
3719 	if (rc <= 0)
3720 		goto out;
3721 
3722 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3723 				     server->vals->exclusive_lock_type, 0,
3724 				     NULL, CIFS_WRITE_OP))
3725 		rc = __generic_file_write_iter(iocb, from);
3726 	else
3727 		rc = -EACCES;
3728 out:
3729 	up_read(&cinode->lock_sem);
3730 	inode_unlock(inode);
3731 
3732 	if (rc > 0)
3733 		rc = generic_write_sync(iocb, rc);
3734 	return rc;
3735 }
3736 
3737 ssize_t
3738 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3739 {
3740 	struct inode *inode = file_inode(iocb->ki_filp);
3741 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3742 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3743 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3744 						iocb->ki_filp->private_data;
3745 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3746 	ssize_t written;
3747 
3748 	written = cifs_get_writer(cinode);
3749 	if (written)
3750 		return written;
3751 
3752 	if (CIFS_CACHE_WRITE(cinode)) {
3753 		if (cap_unix(tcon->ses) &&
3754 		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3755 		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3756 			written = generic_file_write_iter(iocb, from);
3757 			goto out;
3758 		}
3759 		written = cifs_writev(iocb, from);
3760 		goto out;
3761 	}
3762 	/*
3763 	 * For non-oplocked files in strict cache mode we need to write the data
3764 	 * to the server exactly from the pos to pos+len-1 rather than flush all
3765 	 * affected pages because it may cause a error with mandatory locks on
3766 	 * these pages but not on the region from pos to ppos+len-1.
3767 	 */
3768 	written = cifs_user_writev(iocb, from);
3769 	if (CIFS_CACHE_READ(cinode)) {
3770 		/*
3771 		 * We have read level caching and we have just sent a write
3772 		 * request to the server thus making data in the cache stale.
3773 		 * Zap the cache and set oplock/lease level to NONE to avoid
3774 		 * reading stale data from the cache. All subsequent read
3775 		 * operations will read new data from the server.
3776 		 */
3777 		cifs_zap_mapping(inode);
3778 		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3779 			 inode);
3780 		cinode->oplock = 0;
3781 	}
3782 out:
3783 	cifs_put_writer(cinode);
3784 	return written;
3785 }
3786 
3787 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3788 {
3789 	struct cifs_readdata *rdata;
3790 
3791 	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3792 	if (rdata) {
3793 		kref_init(&rdata->refcount);
3794 		INIT_LIST_HEAD(&rdata->list);
3795 		init_completion(&rdata->done);
3796 		INIT_WORK(&rdata->work, complete);
3797 	}
3798 
3799 	return rdata;
3800 }
3801 
3802 void
3803 cifs_readdata_release(struct kref *refcount)
3804 {
3805 	struct cifs_readdata *rdata = container_of(refcount,
3806 					struct cifs_readdata, refcount);
3807 
3808 	if (rdata->ctx)
3809 		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3810 #ifdef CONFIG_CIFS_SMB_DIRECT
3811 	if (rdata->mr) {
3812 		smbd_deregister_mr(rdata->mr);
3813 		rdata->mr = NULL;
3814 	}
3815 #endif
3816 	if (rdata->cfile)
3817 		cifsFileInfo_put(rdata->cfile);
3818 
3819 	kfree(rdata);
3820 }
3821 
3822 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3823 
3824 static void
3825 cifs_uncached_readv_complete(struct work_struct *work)
3826 {
3827 	struct cifs_readdata *rdata = container_of(work,
3828 						struct cifs_readdata, work);
3829 
3830 	complete(&rdata->done);
3831 	collect_uncached_read_data(rdata->ctx);
3832 	/* the below call can possibly free the last ref to aio ctx */
3833 	kref_put(&rdata->refcount, cifs_readdata_release);
3834 }
3835 
3836 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3837 			struct list_head *rdata_list,
3838 			struct cifs_aio_ctx *ctx)
3839 {
3840 	unsigned int rsize;
3841 	struct cifs_credits credits;
3842 	int rc;
3843 	struct TCP_Server_Info *server;
3844 
3845 	/* XXX: should we pick a new channel here? */
3846 	server = rdata->server;
3847 
3848 	do {
3849 		if (rdata->cfile->invalidHandle) {
3850 			rc = cifs_reopen_file(rdata->cfile, true);
3851 			if (rc == -EAGAIN)
3852 				continue;
3853 			else if (rc)
3854 				break;
3855 		}
3856 
3857 		/*
3858 		 * Wait for credits to resend this rdata.
3859 		 * Note: we are attempting to resend the whole rdata not in
3860 		 * segments
3861 		 */
3862 		do {
3863 			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3864 						&rsize, &credits);
3865 
3866 			if (rc)
3867 				goto fail;
3868 
3869 			if (rsize < rdata->bytes) {
3870 				add_credits_and_wake_if(server, &credits, 0);
3871 				msleep(1000);
3872 			}
3873 		} while (rsize < rdata->bytes);
3874 		rdata->credits = credits;
3875 
3876 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3877 		if (!rc) {
3878 			if (rdata->cfile->invalidHandle)
3879 				rc = -EAGAIN;
3880 			else {
3881 #ifdef CONFIG_CIFS_SMB_DIRECT
3882 				if (rdata->mr) {
3883 					rdata->mr->need_invalidate = true;
3884 					smbd_deregister_mr(rdata->mr);
3885 					rdata->mr = NULL;
3886 				}
3887 #endif
3888 				rc = server->ops->async_readv(rdata);
3889 			}
3890 		}
3891 
3892 		/* If the read was successfully sent, we are done */
3893 		if (!rc) {
3894 			/* Add to aio pending list */
3895 			list_add_tail(&rdata->list, rdata_list);
3896 			return 0;
3897 		}
3898 
3899 		/* Roll back credits and retry if needed */
3900 		add_credits_and_wake_if(server, &rdata->credits, 0);
3901 	} while (rc == -EAGAIN);
3902 
3903 fail:
3904 	kref_put(&rdata->refcount, cifs_readdata_release);
3905 	return rc;
3906 }
3907 
3908 static int
3909 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
3910 		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3911 		     struct cifs_aio_ctx *ctx)
3912 {
3913 	struct cifs_readdata *rdata;
3914 	unsigned int rsize, nsegs, max_segs = INT_MAX;
3915 	struct cifs_credits credits_on_stack;
3916 	struct cifs_credits *credits = &credits_on_stack;
3917 	size_t cur_len, max_len;
3918 	int rc;
3919 	pid_t pid;
3920 	struct TCP_Server_Info *server;
3921 
3922 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3923 
3924 #ifdef CONFIG_CIFS_SMB_DIRECT
3925 	if (server->smbd_conn)
3926 		max_segs = server->smbd_conn->max_frmr_depth;
3927 #endif
3928 
3929 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3930 		pid = open_file->pid;
3931 	else
3932 		pid = current->tgid;
3933 
3934 	do {
3935 		if (open_file->invalidHandle) {
3936 			rc = cifs_reopen_file(open_file, true);
3937 			if (rc == -EAGAIN)
3938 				continue;
3939 			else if (rc)
3940 				break;
3941 		}
3942 
3943 		if (cifs_sb->ctx->rsize == 0)
3944 			cifs_sb->ctx->rsize =
3945 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
3946 							     cifs_sb->ctx);
3947 
3948 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3949 						   &rsize, credits);
3950 		if (rc)
3951 			break;
3952 
3953 		max_len = min_t(size_t, len, rsize);
3954 
3955 		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
3956 						 max_segs, &nsegs);
3957 		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3958 			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
3959 		if (cur_len == 0) {
3960 			rc = -EIO;
3961 			add_credits_and_wake_if(server, credits, 0);
3962 			break;
3963 		}
3964 
3965 		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
3966 		if (!rdata) {
3967 			add_credits_and_wake_if(server, credits, 0);
3968 			rc = -ENOMEM;
3969 			break;
3970 		}
3971 
3972 		rdata->server	= server;
3973 		rdata->cfile	= cifsFileInfo_get(open_file);
3974 		rdata->offset	= fpos;
3975 		rdata->bytes	= cur_len;
3976 		rdata->pid	= pid;
3977 		rdata->credits	= credits_on_stack;
3978 		rdata->ctx	= ctx;
3979 		kref_get(&ctx->refcount);
3980 
3981 		rdata->iter	= ctx->iter;
3982 		iov_iter_truncate(&rdata->iter, cur_len);
3983 
3984 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3985 
3986 		if (!rc) {
3987 			if (rdata->cfile->invalidHandle)
3988 				rc = -EAGAIN;
3989 			else
3990 				rc = server->ops->async_readv(rdata);
3991 		}
3992 
3993 		if (rc) {
3994 			add_credits_and_wake_if(server, &rdata->credits, 0);
3995 			kref_put(&rdata->refcount, cifs_readdata_release);
3996 			if (rc == -EAGAIN)
3997 				continue;
3998 			break;
3999 		}
4000 
4001 		list_add_tail(&rdata->list, rdata_list);
4002 		iov_iter_advance(&ctx->iter, cur_len);
4003 		fpos += cur_len;
4004 		len -= cur_len;
4005 	} while (len > 0);
4006 
4007 	return rc;
4008 }
4009 
4010 static void
4011 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4012 {
4013 	struct cifs_readdata *rdata, *tmp;
4014 	struct cifs_sb_info *cifs_sb;
4015 	int rc;
4016 
4017 	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4018 
4019 	mutex_lock(&ctx->aio_mutex);
4020 
4021 	if (list_empty(&ctx->list)) {
4022 		mutex_unlock(&ctx->aio_mutex);
4023 		return;
4024 	}
4025 
4026 	rc = ctx->rc;
4027 	/* the loop below should proceed in the order of increasing offsets */
4028 again:
4029 	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4030 		if (!rc) {
4031 			if (!try_wait_for_completion(&rdata->done)) {
4032 				mutex_unlock(&ctx->aio_mutex);
4033 				return;
4034 			}
4035 
4036 			if (rdata->result == -EAGAIN) {
4037 				/* resend call if it's a retryable error */
4038 				struct list_head tmp_list;
4039 				unsigned int got_bytes = rdata->got_bytes;
4040 
4041 				list_del_init(&rdata->list);
4042 				INIT_LIST_HEAD(&tmp_list);
4043 
4044 				if (ctx->direct_io) {
4045 					/*
4046 					 * Re-use rdata as this is a
4047 					 * direct I/O
4048 					 */
4049 					rc = cifs_resend_rdata(
4050 						rdata,
4051 						&tmp_list, ctx);
4052 				} else {
4053 					rc = cifs_send_async_read(
4054 						rdata->offset + got_bytes,
4055 						rdata->bytes - got_bytes,
4056 						rdata->cfile, cifs_sb,
4057 						&tmp_list, ctx);
4058 
4059 					kref_put(&rdata->refcount,
4060 						cifs_readdata_release);
4061 				}
4062 
4063 				list_splice(&tmp_list, &ctx->list);
4064 
4065 				goto again;
4066 			} else if (rdata->result)
4067 				rc = rdata->result;
4068 
4069 			/* if there was a short read -- discard anything left */
4070 			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4071 				rc = -ENODATA;
4072 
4073 			ctx->total_len += rdata->got_bytes;
4074 		}
4075 		list_del_init(&rdata->list);
4076 		kref_put(&rdata->refcount, cifs_readdata_release);
4077 	}
4078 
4079 	/* mask nodata case */
4080 	if (rc == -ENODATA)
4081 		rc = 0;
4082 
4083 	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4084 
4085 	mutex_unlock(&ctx->aio_mutex);
4086 
4087 	if (ctx->iocb && ctx->iocb->ki_complete)
4088 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4089 	else
4090 		complete(&ctx->done);
4091 }
4092 
4093 static ssize_t __cifs_readv(
4094 	struct kiocb *iocb, struct iov_iter *to, bool direct)
4095 {
4096 	size_t len;
4097 	struct file *file = iocb->ki_filp;
4098 	struct cifs_sb_info *cifs_sb;
4099 	struct cifsFileInfo *cfile;
4100 	struct cifs_tcon *tcon;
4101 	ssize_t rc, total_read = 0;
4102 	loff_t offset = iocb->ki_pos;
4103 	struct cifs_aio_ctx *ctx;
4104 
4105 	len = iov_iter_count(to);
4106 	if (!len)
4107 		return 0;
4108 
4109 	cifs_sb = CIFS_FILE_SB(file);
4110 	cfile = file->private_data;
4111 	tcon = tlink_tcon(cfile->tlink);
4112 
4113 	if (!tcon->ses->server->ops->async_readv)
4114 		return -ENOSYS;
4115 
4116 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4117 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4118 
4119 	ctx = cifs_aio_ctx_alloc();
4120 	if (!ctx)
4121 		return -ENOMEM;
4122 
4123 	ctx->pos	= offset;
4124 	ctx->direct_io	= direct;
4125 	ctx->len	= len;
4126 	ctx->cfile	= cifsFileInfo_get(cfile);
4127 	ctx->nr_pinned_pages = 0;
4128 
4129 	if (!is_sync_kiocb(iocb))
4130 		ctx->iocb = iocb;
4131 
4132 	if (user_backed_iter(to)) {
4133 		/*
4134 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4135 		 * they contain references to the calling process's virtual
4136 		 * memory layout which won't be available in an async worker
4137 		 * thread.  This also takes a pin on every folio involved.
4138 		 */
4139 		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4140 					     &ctx->iter, 0);
4141 		if (rc < 0) {
4142 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4143 			return rc;
4144 		}
4145 
4146 		ctx->nr_pinned_pages = rc;
4147 		ctx->bv = (void *)ctx->iter.bvec;
4148 		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4149 		ctx->should_dirty = true;
4150 	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4151 		   !is_sync_kiocb(iocb)) {
4152 		/*
4153 		 * If the op is asynchronous, we need to copy the list attached
4154 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4155 		 * will be retained by the caller; in any case, we may or may
4156 		 * not be able to pin the pages, so we don't try.
4157 		 */
4158 		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4159 		if (!ctx->bv) {
4160 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4161 			return -ENOMEM;
4162 		}
4163 	} else {
4164 		/*
4165 		 * Otherwise, we just pass the iterator down as-is and rely on
4166 		 * the caller to make sure the pages referred to by the
4167 		 * iterator don't evaporate.
4168 		 */
4169 		ctx->iter = *to;
4170 	}
4171 
4172 	if (direct) {
4173 		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4174 						  offset, offset + len - 1);
4175 		if (rc) {
4176 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4177 			return -EAGAIN;
4178 		}
4179 	}
4180 
4181 	/* grab a lock here due to read response handlers can access ctx */
4182 	mutex_lock(&ctx->aio_mutex);
4183 
4184 	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4185 
4186 	/* if at least one read request send succeeded, then reset rc */
4187 	if (!list_empty(&ctx->list))
4188 		rc = 0;
4189 
4190 	mutex_unlock(&ctx->aio_mutex);
4191 
4192 	if (rc) {
4193 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4194 		return rc;
4195 	}
4196 
4197 	if (!is_sync_kiocb(iocb)) {
4198 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4199 		return -EIOCBQUEUED;
4200 	}
4201 
4202 	rc = wait_for_completion_killable(&ctx->done);
4203 	if (rc) {
4204 		mutex_lock(&ctx->aio_mutex);
4205 		ctx->rc = rc = -EINTR;
4206 		total_read = ctx->total_len;
4207 		mutex_unlock(&ctx->aio_mutex);
4208 	} else {
4209 		rc = ctx->rc;
4210 		total_read = ctx->total_len;
4211 	}
4212 
4213 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4214 
4215 	if (total_read) {
4216 		iocb->ki_pos += total_read;
4217 		return total_read;
4218 	}
4219 	return rc;
4220 }
4221 
4222 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4223 {
4224 	return __cifs_readv(iocb, to, true);
4225 }
4226 
4227 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4228 {
4229 	return __cifs_readv(iocb, to, false);
4230 }
4231 
4232 ssize_t
4233 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4234 {
4235 	struct inode *inode = file_inode(iocb->ki_filp);
4236 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4237 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4238 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4239 						iocb->ki_filp->private_data;
4240 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4241 	int rc = -EACCES;
4242 
4243 	/*
4244 	 * In strict cache mode we need to read from the server all the time
4245 	 * if we don't have level II oplock because the server can delay mtime
4246 	 * change - so we can't make a decision about inode invalidating.
4247 	 * And we can also fail with pagereading if there are mandatory locks
4248 	 * on pages affected by this read but not on the region from pos to
4249 	 * pos+len-1.
4250 	 */
4251 	if (!CIFS_CACHE_READ(cinode))
4252 		return cifs_user_readv(iocb, to);
4253 
4254 	if (cap_unix(tcon->ses) &&
4255 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4256 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4257 		return generic_file_read_iter(iocb, to);
4258 
4259 	/*
4260 	 * We need to hold the sem to be sure nobody modifies lock list
4261 	 * with a brlock that prevents reading.
4262 	 */
4263 	down_read(&cinode->lock_sem);
4264 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4265 				     tcon->ses->server->vals->shared_lock_type,
4266 				     0, NULL, CIFS_READ_OP))
4267 		rc = generic_file_read_iter(iocb, to);
4268 	up_read(&cinode->lock_sem);
4269 	return rc;
4270 }
4271 
4272 static ssize_t
4273 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4274 {
4275 	int rc = -EACCES;
4276 	unsigned int bytes_read = 0;
4277 	unsigned int total_read;
4278 	unsigned int current_read_size;
4279 	unsigned int rsize;
4280 	struct cifs_sb_info *cifs_sb;
4281 	struct cifs_tcon *tcon;
4282 	struct TCP_Server_Info *server;
4283 	unsigned int xid;
4284 	char *cur_offset;
4285 	struct cifsFileInfo *open_file;
4286 	struct cifs_io_parms io_parms = {0};
4287 	int buf_type = CIFS_NO_BUFFER;
4288 	__u32 pid;
4289 
4290 	xid = get_xid();
4291 	cifs_sb = CIFS_FILE_SB(file);
4292 
4293 	/* FIXME: set up handlers for larger reads and/or convert to async */
4294 	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4295 
4296 	if (file->private_data == NULL) {
4297 		rc = -EBADF;
4298 		free_xid(xid);
4299 		return rc;
4300 	}
4301 	open_file = file->private_data;
4302 	tcon = tlink_tcon(open_file->tlink);
4303 	server = cifs_pick_channel(tcon->ses);
4304 
4305 	if (!server->ops->sync_read) {
4306 		free_xid(xid);
4307 		return -ENOSYS;
4308 	}
4309 
4310 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4311 		pid = open_file->pid;
4312 	else
4313 		pid = current->tgid;
4314 
4315 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4316 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4317 
4318 	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4319 	     total_read += bytes_read, cur_offset += bytes_read) {
4320 		do {
4321 			current_read_size = min_t(uint, read_size - total_read,
4322 						  rsize);
4323 			/*
4324 			 * For windows me and 9x we do not want to request more
4325 			 * than it negotiated since it will refuse the read
4326 			 * then.
4327 			 */
4328 			if (!(tcon->ses->capabilities &
4329 				tcon->ses->server->vals->cap_large_files)) {
4330 				current_read_size = min_t(uint,
4331 					current_read_size, CIFSMaxBufSize);
4332 			}
4333 			if (open_file->invalidHandle) {
4334 				rc = cifs_reopen_file(open_file, true);
4335 				if (rc != 0)
4336 					break;
4337 			}
4338 			io_parms.pid = pid;
4339 			io_parms.tcon = tcon;
4340 			io_parms.offset = *offset;
4341 			io_parms.length = current_read_size;
4342 			io_parms.server = server;
4343 			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4344 						    &bytes_read, &cur_offset,
4345 						    &buf_type);
4346 		} while (rc == -EAGAIN);
4347 
4348 		if (rc || (bytes_read == 0)) {
4349 			if (total_read) {
4350 				break;
4351 			} else {
4352 				free_xid(xid);
4353 				return rc;
4354 			}
4355 		} else {
4356 			cifs_stats_bytes_read(tcon, total_read);
4357 			*offset += bytes_read;
4358 		}
4359 	}
4360 	free_xid(xid);
4361 	return total_read;
4362 }
4363 
4364 /*
4365  * If the page is mmap'ed into a process' page tables, then we need to make
4366  * sure that it doesn't change while being written back.
4367  */
4368 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4369 {
4370 	struct folio *folio = page_folio(vmf->page);
4371 
4372 	/* Wait for the folio to be written to the cache before we allow it to
4373 	 * be modified.  We then assume the entire folio will need writing back.
4374 	 */
4375 #ifdef CONFIG_CIFS_FSCACHE
4376 	if (folio_test_fscache(folio) &&
4377 	    folio_wait_fscache_killable(folio) < 0)
4378 		return VM_FAULT_RETRY;
4379 #endif
4380 
4381 	folio_wait_writeback(folio);
4382 
4383 	if (folio_lock_killable(folio) < 0)
4384 		return VM_FAULT_RETRY;
4385 	return VM_FAULT_LOCKED;
4386 }
4387 
4388 static const struct vm_operations_struct cifs_file_vm_ops = {
4389 	.fault = filemap_fault,
4390 	.map_pages = filemap_map_pages,
4391 	.page_mkwrite = cifs_page_mkwrite,
4392 };
4393 
4394 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4395 {
4396 	int xid, rc = 0;
4397 	struct inode *inode = file_inode(file);
4398 
4399 	xid = get_xid();
4400 
4401 	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4402 		rc = cifs_zap_mapping(inode);
4403 	if (!rc)
4404 		rc = generic_file_mmap(file, vma);
4405 	if (!rc)
4406 		vma->vm_ops = &cifs_file_vm_ops;
4407 
4408 	free_xid(xid);
4409 	return rc;
4410 }
4411 
4412 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4413 {
4414 	int rc, xid;
4415 
4416 	xid = get_xid();
4417 
4418 	rc = cifs_revalidate_file(file);
4419 	if (rc)
4420 		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4421 			 rc);
4422 	if (!rc)
4423 		rc = generic_file_mmap(file, vma);
4424 	if (!rc)
4425 		vma->vm_ops = &cifs_file_vm_ops;
4426 
4427 	free_xid(xid);
4428 	return rc;
4429 }
4430 
4431 /*
4432  * Unlock a bunch of folios in the pagecache.
4433  */
4434 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4435 {
4436 	struct folio *folio;
4437 	XA_STATE(xas, &mapping->i_pages, first);
4438 
4439 	rcu_read_lock();
4440 	xas_for_each(&xas, folio, last) {
4441 		folio_unlock(folio);
4442 	}
4443 	rcu_read_unlock();
4444 }
4445 
4446 static void cifs_readahead_complete(struct work_struct *work)
4447 {
4448 	struct cifs_readdata *rdata = container_of(work,
4449 						   struct cifs_readdata, work);
4450 	struct folio *folio;
4451 	pgoff_t last;
4452 	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4453 
4454 	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4455 
4456 	if (good)
4457 		cifs_readahead_to_fscache(rdata->mapping->host,
4458 					  rdata->offset, rdata->bytes);
4459 
4460 	if (iov_iter_count(&rdata->iter) > 0)
4461 		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4462 
4463 	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4464 
4465 	rcu_read_lock();
4466 	xas_for_each(&xas, folio, last) {
4467 		if (good) {
4468 			flush_dcache_folio(folio);
4469 			folio_mark_uptodate(folio);
4470 		}
4471 		folio_unlock(folio);
4472 	}
4473 	rcu_read_unlock();
4474 
4475 	kref_put(&rdata->refcount, cifs_readdata_release);
4476 }
4477 
4478 static void cifs_readahead(struct readahead_control *ractl)
4479 {
4480 	struct cifsFileInfo *open_file = ractl->file->private_data;
4481 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4482 	struct TCP_Server_Info *server;
4483 	unsigned int xid, nr_pages, cache_nr_pages = 0;
4484 	unsigned int ra_pages;
4485 	pgoff_t next_cached = ULONG_MAX, ra_index;
4486 	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4487 		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4488 	bool check_cache = caching;
4489 	pid_t pid;
4490 	int rc = 0;
4491 
4492 	/* Note that readahead_count() lags behind our dequeuing of pages from
4493 	 * the ractl, wo we have to keep track for ourselves.
4494 	 */
4495 	ra_pages = readahead_count(ractl);
4496 	ra_index = readahead_index(ractl);
4497 
4498 	xid = get_xid();
4499 
4500 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4501 		pid = open_file->pid;
4502 	else
4503 		pid = current->tgid;
4504 
4505 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4506 
4507 	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4508 		 __func__, ractl->file, ractl->mapping, ra_pages);
4509 
4510 	/*
4511 	 * Chop the readahead request up into rsize-sized read requests.
4512 	 */
4513 	while ((nr_pages = ra_pages)) {
4514 		unsigned int i, rsize;
4515 		struct cifs_readdata *rdata;
4516 		struct cifs_credits credits_on_stack;
4517 		struct cifs_credits *credits = &credits_on_stack;
4518 		struct folio *folio;
4519 		pgoff_t fsize;
4520 
4521 		/*
4522 		 * Find out if we have anything cached in the range of
4523 		 * interest, and if so, where the next chunk of cached data is.
4524 		 */
4525 		if (caching) {
4526 			if (check_cache) {
4527 				rc = cifs_fscache_query_occupancy(
4528 					ractl->mapping->host, ra_index, nr_pages,
4529 					&next_cached, &cache_nr_pages);
4530 				if (rc < 0)
4531 					caching = false;
4532 				check_cache = false;
4533 			}
4534 
4535 			if (ra_index == next_cached) {
4536 				/*
4537 				 * TODO: Send a whole batch of pages to be read
4538 				 * by the cache.
4539 				 */
4540 				folio = readahead_folio(ractl);
4541 				fsize = folio_nr_pages(folio);
4542 				ra_pages -= fsize;
4543 				ra_index += fsize;
4544 				if (cifs_readpage_from_fscache(ractl->mapping->host,
4545 							       &folio->page) < 0) {
4546 					/*
4547 					 * TODO: Deal with cache read failure
4548 					 * here, but for the moment, delegate
4549 					 * that to readpage.
4550 					 */
4551 					caching = false;
4552 				}
4553 				folio_unlock(folio);
4554 				next_cached += fsize;
4555 				cache_nr_pages -= fsize;
4556 				if (cache_nr_pages == 0)
4557 					check_cache = true;
4558 				continue;
4559 			}
4560 		}
4561 
4562 		if (open_file->invalidHandle) {
4563 			rc = cifs_reopen_file(open_file, true);
4564 			if (rc) {
4565 				if (rc == -EAGAIN)
4566 					continue;
4567 				break;
4568 			}
4569 		}
4570 
4571 		if (cifs_sb->ctx->rsize == 0)
4572 			cifs_sb->ctx->rsize =
4573 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4574 							     cifs_sb->ctx);
4575 
4576 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4577 						   &rsize, credits);
4578 		if (rc)
4579 			break;
4580 		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4581 		if (next_cached != ULONG_MAX)
4582 			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4583 
4584 		/*
4585 		 * Give up immediately if rsize is too small to read an entire
4586 		 * page. The VFS will fall back to readpage. We should never
4587 		 * reach this point however since we set ra_pages to 0 when the
4588 		 * rsize is smaller than a cache page.
4589 		 */
4590 		if (unlikely(!nr_pages)) {
4591 			add_credits_and_wake_if(server, credits, 0);
4592 			break;
4593 		}
4594 
4595 		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4596 		if (!rdata) {
4597 			/* best to give up if we're out of mem */
4598 			add_credits_and_wake_if(server, credits, 0);
4599 			break;
4600 		}
4601 
4602 		rdata->offset	= ra_index * PAGE_SIZE;
4603 		rdata->bytes	= nr_pages * PAGE_SIZE;
4604 		rdata->cfile	= cifsFileInfo_get(open_file);
4605 		rdata->server	= server;
4606 		rdata->mapping	= ractl->mapping;
4607 		rdata->pid	= pid;
4608 		rdata->credits	= credits_on_stack;
4609 
4610 		for (i = 0; i < nr_pages; i++) {
4611 			if (!readahead_folio(ractl))
4612 				WARN_ON(1);
4613 		}
4614 		ra_pages -= nr_pages;
4615 		ra_index += nr_pages;
4616 
4617 		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4618 				rdata->offset, rdata->bytes);
4619 
4620 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4621 		if (!rc) {
4622 			if (rdata->cfile->invalidHandle)
4623 				rc = -EAGAIN;
4624 			else
4625 				rc = server->ops->async_readv(rdata);
4626 		}
4627 
4628 		if (rc) {
4629 			add_credits_and_wake_if(server, &rdata->credits, 0);
4630 			cifs_unlock_folios(rdata->mapping,
4631 					   rdata->offset / PAGE_SIZE,
4632 					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4633 			/* Fallback to the readpage in error/reconnect cases */
4634 			kref_put(&rdata->refcount, cifs_readdata_release);
4635 			break;
4636 		}
4637 
4638 		kref_put(&rdata->refcount, cifs_readdata_release);
4639 	}
4640 
4641 	free_xid(xid);
4642 }
4643 
4644 /*
4645  * cifs_readpage_worker must be called with the page pinned
4646  */
4647 static int cifs_readpage_worker(struct file *file, struct page *page,
4648 	loff_t *poffset)
4649 {
4650 	char *read_data;
4651 	int rc;
4652 
4653 	/* Is the page cached? */
4654 	rc = cifs_readpage_from_fscache(file_inode(file), page);
4655 	if (rc == 0)
4656 		goto read_complete;
4657 
4658 	read_data = kmap(page);
4659 	/* for reads over a certain size could initiate async read ahead */
4660 
4661 	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4662 
4663 	if (rc < 0)
4664 		goto io_error;
4665 	else
4666 		cifs_dbg(FYI, "Bytes read %d\n", rc);
4667 
4668 	/* we do not want atime to be less than mtime, it broke some apps */
4669 	file_inode(file)->i_atime = current_time(file_inode(file));
4670 	if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4671 		file_inode(file)->i_atime = file_inode(file)->i_mtime;
4672 	else
4673 		file_inode(file)->i_atime = current_time(file_inode(file));
4674 
4675 	if (PAGE_SIZE > rc)
4676 		memset(read_data + rc, 0, PAGE_SIZE - rc);
4677 
4678 	flush_dcache_page(page);
4679 	SetPageUptodate(page);
4680 	rc = 0;
4681 
4682 io_error:
4683 	kunmap(page);
4684 
4685 read_complete:
4686 	unlock_page(page);
4687 	return rc;
4688 }
4689 
4690 static int cifs_read_folio(struct file *file, struct folio *folio)
4691 {
4692 	struct page *page = &folio->page;
4693 	loff_t offset = page_file_offset(page);
4694 	int rc = -EACCES;
4695 	unsigned int xid;
4696 
4697 	xid = get_xid();
4698 
4699 	if (file->private_data == NULL) {
4700 		rc = -EBADF;
4701 		free_xid(xid);
4702 		return rc;
4703 	}
4704 
4705 	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4706 		 page, (int)offset, (int)offset);
4707 
4708 	rc = cifs_readpage_worker(file, page, &offset);
4709 
4710 	free_xid(xid);
4711 	return rc;
4712 }
4713 
4714 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4715 {
4716 	struct cifsFileInfo *open_file;
4717 
4718 	spin_lock(&cifs_inode->open_file_lock);
4719 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4720 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4721 			spin_unlock(&cifs_inode->open_file_lock);
4722 			return 1;
4723 		}
4724 	}
4725 	spin_unlock(&cifs_inode->open_file_lock);
4726 	return 0;
4727 }
4728 
4729 /* We do not want to update the file size from server for inodes
4730    open for write - to avoid races with writepage extending
4731    the file - in the future we could consider allowing
4732    refreshing the inode only on increases in the file size
4733    but this is tricky to do without racing with writebehind
4734    page caching in the current Linux kernel design */
4735 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4736 {
4737 	if (!cifsInode)
4738 		return true;
4739 
4740 	if (is_inode_writable(cifsInode)) {
4741 		/* This inode is open for write at least once */
4742 		struct cifs_sb_info *cifs_sb;
4743 
4744 		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4745 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4746 			/* since no page cache to corrupt on directio
4747 			we can change size safely */
4748 			return true;
4749 		}
4750 
4751 		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4752 			return true;
4753 
4754 		return false;
4755 	} else
4756 		return true;
4757 }
4758 
4759 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4760 			loff_t pos, unsigned len,
4761 			struct page **pagep, void **fsdata)
4762 {
4763 	int oncethru = 0;
4764 	pgoff_t index = pos >> PAGE_SHIFT;
4765 	loff_t offset = pos & (PAGE_SIZE - 1);
4766 	loff_t page_start = pos & PAGE_MASK;
4767 	loff_t i_size;
4768 	struct page *page;
4769 	int rc = 0;
4770 
4771 	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4772 
4773 start:
4774 	page = grab_cache_page_write_begin(mapping, index);
4775 	if (!page) {
4776 		rc = -ENOMEM;
4777 		goto out;
4778 	}
4779 
4780 	if (PageUptodate(page))
4781 		goto out;
4782 
4783 	/*
4784 	 * If we write a full page it will be up to date, no need to read from
4785 	 * the server. If the write is short, we'll end up doing a sync write
4786 	 * instead.
4787 	 */
4788 	if (len == PAGE_SIZE)
4789 		goto out;
4790 
4791 	/*
4792 	 * optimize away the read when we have an oplock, and we're not
4793 	 * expecting to use any of the data we'd be reading in. That
4794 	 * is, when the page lies beyond the EOF, or straddles the EOF
4795 	 * and the write will cover all of the existing data.
4796 	 */
4797 	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4798 		i_size = i_size_read(mapping->host);
4799 		if (page_start >= i_size ||
4800 		    (offset == 0 && (pos + len) >= i_size)) {
4801 			zero_user_segments(page, 0, offset,
4802 					   offset + len,
4803 					   PAGE_SIZE);
4804 			/*
4805 			 * PageChecked means that the parts of the page
4806 			 * to which we're not writing are considered up
4807 			 * to date. Once the data is copied to the
4808 			 * page, it can be set uptodate.
4809 			 */
4810 			SetPageChecked(page);
4811 			goto out;
4812 		}
4813 	}
4814 
4815 	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4816 		/*
4817 		 * might as well read a page, it is fast enough. If we get
4818 		 * an error, we don't need to return it. cifs_write_end will
4819 		 * do a sync write instead since PG_uptodate isn't set.
4820 		 */
4821 		cifs_readpage_worker(file, page, &page_start);
4822 		put_page(page);
4823 		oncethru = 1;
4824 		goto start;
4825 	} else {
4826 		/* we could try using another file handle if there is one -
4827 		   but how would we lock it to prevent close of that handle
4828 		   racing with this read? In any case
4829 		   this will be written out by write_end so is fine */
4830 	}
4831 out:
4832 	*pagep = page;
4833 	return rc;
4834 }
4835 
4836 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4837 {
4838 	if (folio_test_private(folio))
4839 		return 0;
4840 	if (folio_test_fscache(folio)) {
4841 		if (current_is_kswapd() || !(gfp & __GFP_FS))
4842 			return false;
4843 		folio_wait_fscache(folio);
4844 	}
4845 	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4846 	return true;
4847 }
4848 
4849 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4850 				 size_t length)
4851 {
4852 	folio_wait_fscache(folio);
4853 }
4854 
4855 static int cifs_launder_folio(struct folio *folio)
4856 {
4857 	int rc = 0;
4858 	loff_t range_start = folio_pos(folio);
4859 	loff_t range_end = range_start + folio_size(folio);
4860 	struct writeback_control wbc = {
4861 		.sync_mode = WB_SYNC_ALL,
4862 		.nr_to_write = 0,
4863 		.range_start = range_start,
4864 		.range_end = range_end,
4865 	};
4866 
4867 	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4868 
4869 	if (folio_clear_dirty_for_io(folio))
4870 		rc = cifs_writepage_locked(&folio->page, &wbc);
4871 
4872 	folio_wait_fscache(folio);
4873 	return rc;
4874 }
4875 
4876 void cifs_oplock_break(struct work_struct *work)
4877 {
4878 	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4879 						  oplock_break);
4880 	struct inode *inode = d_inode(cfile->dentry);
4881 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4882 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4883 	struct cifs_tcon *tcon;
4884 	struct TCP_Server_Info *server;
4885 	struct tcon_link *tlink;
4886 	int rc = 0;
4887 	bool purge_cache = false, oplock_break_cancelled;
4888 	__u64 persistent_fid, volatile_fid;
4889 	__u16 net_fid;
4890 
4891 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4892 			TASK_UNINTERRUPTIBLE);
4893 
4894 	tlink = cifs_sb_tlink(cifs_sb);
4895 	if (IS_ERR(tlink))
4896 		goto out;
4897 	tcon = tlink_tcon(tlink);
4898 	server = tcon->ses->server;
4899 
4900 	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4901 				      cfile->oplock_epoch, &purge_cache);
4902 
4903 	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4904 						cifs_has_mand_locks(cinode)) {
4905 		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4906 			 inode);
4907 		cinode->oplock = 0;
4908 	}
4909 
4910 	if (inode && S_ISREG(inode->i_mode)) {
4911 		if (CIFS_CACHE_READ(cinode))
4912 			break_lease(inode, O_RDONLY);
4913 		else
4914 			break_lease(inode, O_WRONLY);
4915 		rc = filemap_fdatawrite(inode->i_mapping);
4916 		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4917 			rc = filemap_fdatawait(inode->i_mapping);
4918 			mapping_set_error(inode->i_mapping, rc);
4919 			cifs_zap_mapping(inode);
4920 		}
4921 		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4922 		if (CIFS_CACHE_WRITE(cinode))
4923 			goto oplock_break_ack;
4924 	}
4925 
4926 	rc = cifs_push_locks(cfile);
4927 	if (rc)
4928 		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4929 
4930 oplock_break_ack:
4931 	/*
4932 	 * When oplock break is received and there are no active
4933 	 * file handles but cached, then schedule deferred close immediately.
4934 	 * So, new open will not use cached handle.
4935 	 */
4936 
4937 	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
4938 		cifs_close_deferred_file(cinode);
4939 
4940 	persistent_fid = cfile->fid.persistent_fid;
4941 	volatile_fid = cfile->fid.volatile_fid;
4942 	net_fid = cfile->fid.netfid;
4943 	oplock_break_cancelled = cfile->oplock_break_cancelled;
4944 
4945 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4946 	/*
4947 	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
4948 	 * an acknowledgment to be sent when the file has already been closed.
4949 	 */
4950 	spin_lock(&cinode->open_file_lock);
4951 	/* check list empty since can race with kill_sb calling tree disconnect */
4952 	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
4953 		spin_unlock(&cinode->open_file_lock);
4954 		rc = server->ops->oplock_response(tcon, persistent_fid,
4955 						  volatile_fid, net_fid, cinode);
4956 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4957 	} else
4958 		spin_unlock(&cinode->open_file_lock);
4959 
4960 	cifs_put_tlink(tlink);
4961 out:
4962 	cifs_done_oplock_break(cinode);
4963 }
4964 
4965 /*
4966  * The presence of cifs_direct_io() in the address space ops vector
4967  * allowes open() O_DIRECT flags which would have failed otherwise.
4968  *
4969  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4970  * so this method should never be called.
4971  *
4972  * Direct IO is not yet supported in the cached mode.
4973  */
4974 static ssize_t
4975 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4976 {
4977         /*
4978          * FIXME
4979          * Eventually need to support direct IO for non forcedirectio mounts
4980          */
4981         return -EINVAL;
4982 }
4983 
4984 static int cifs_swap_activate(struct swap_info_struct *sis,
4985 			      struct file *swap_file, sector_t *span)
4986 {
4987 	struct cifsFileInfo *cfile = swap_file->private_data;
4988 	struct inode *inode = swap_file->f_mapping->host;
4989 	unsigned long blocks;
4990 	long long isize;
4991 
4992 	cifs_dbg(FYI, "swap activate\n");
4993 
4994 	if (!swap_file->f_mapping->a_ops->swap_rw)
4995 		/* Cannot support swap */
4996 		return -EINVAL;
4997 
4998 	spin_lock(&inode->i_lock);
4999 	blocks = inode->i_blocks;
5000 	isize = inode->i_size;
5001 	spin_unlock(&inode->i_lock);
5002 	if (blocks*512 < isize) {
5003 		pr_warn("swap activate: swapfile has holes\n");
5004 		return -EINVAL;
5005 	}
5006 	*span = sis->pages;
5007 
5008 	pr_warn_once("Swap support over SMB3 is experimental\n");
5009 
5010 	/*
5011 	 * TODO: consider adding ACL (or documenting how) to prevent other
5012 	 * users (on this or other systems) from reading it
5013 	 */
5014 
5015 
5016 	/* TODO: add sk_set_memalloc(inet) or similar */
5017 
5018 	if (cfile)
5019 		cfile->swapfile = true;
5020 	/*
5021 	 * TODO: Since file already open, we can't open with DENY_ALL here
5022 	 * but we could add call to grab a byte range lock to prevent others
5023 	 * from reading or writing the file
5024 	 */
5025 
5026 	sis->flags |= SWP_FS_OPS;
5027 	return add_swap_extent(sis, 0, sis->max, 0);
5028 }
5029 
5030 static void cifs_swap_deactivate(struct file *file)
5031 {
5032 	struct cifsFileInfo *cfile = file->private_data;
5033 
5034 	cifs_dbg(FYI, "swap deactivate\n");
5035 
5036 	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5037 
5038 	if (cfile)
5039 		cfile->swapfile = false;
5040 
5041 	/* do we need to unpin (or unlock) the file */
5042 }
5043 
5044 /*
5045  * Mark a page as having been made dirty and thus needing writeback.  We also
5046  * need to pin the cache object to write back to.
5047  */
5048 #ifdef CONFIG_CIFS_FSCACHE
5049 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5050 {
5051 	return fscache_dirty_folio(mapping, folio,
5052 					cifs_inode_cookie(mapping->host));
5053 }
5054 #else
5055 #define cifs_dirty_folio filemap_dirty_folio
5056 #endif
5057 
5058 const struct address_space_operations cifs_addr_ops = {
5059 	.read_folio = cifs_read_folio,
5060 	.readahead = cifs_readahead,
5061 	.writepages = cifs_writepages,
5062 	.write_begin = cifs_write_begin,
5063 	.write_end = cifs_write_end,
5064 	.dirty_folio = cifs_dirty_folio,
5065 	.release_folio = cifs_release_folio,
5066 	.direct_IO = cifs_direct_io,
5067 	.invalidate_folio = cifs_invalidate_folio,
5068 	.launder_folio = cifs_launder_folio,
5069 	.migrate_folio = filemap_migrate_folio,
5070 	/*
5071 	 * TODO: investigate and if useful we could add an is_dirty_writeback
5072 	 * helper if needed
5073 	 */
5074 	.swap_activate = cifs_swap_activate,
5075 	.swap_deactivate = cifs_swap_deactivate,
5076 };
5077 
5078 /*
5079  * cifs_readahead requires the server to support a buffer large enough to
5080  * contain the header plus one complete page of data.  Otherwise, we need
5081  * to leave cifs_readahead out of the address space operations.
5082  */
5083 const struct address_space_operations cifs_addr_ops_smallbuf = {
5084 	.read_folio = cifs_read_folio,
5085 	.writepages = cifs_writepages,
5086 	.write_begin = cifs_write_begin,
5087 	.write_end = cifs_write_end,
5088 	.dirty_folio = cifs_dirty_folio,
5089 	.release_folio = cifs_release_folio,
5090 	.invalidate_folio = cifs_invalidate_folio,
5091 	.launder_folio = cifs_launder_folio,
5092 	.migrate_folio = filemap_migrate_folio,
5093 };
5094