xref: /linux/fs/smb/client/file.c (revision f82811e22b480a203a438d8e1f29af9c93ccbb0c)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39 
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45 	struct address_space *mapping = inode->i_mapping;
46 	struct folio *folio;
47 	pgoff_t end;
48 
49 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50 
51 	rcu_read_lock();
52 
53 	end = (start + len - 1) / PAGE_SIZE;
54 	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55 		if (xas_retry(&xas, folio))
56 			continue;
57 		xas_pause(&xas);
58 		rcu_read_unlock();
59 		folio_lock(folio);
60 		folio_clear_dirty_for_io(folio);
61 		folio_unlock(folio);
62 		rcu_read_lock();
63 	}
64 
65 	rcu_read_unlock();
66 }
67 
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73 	struct address_space *mapping = inode->i_mapping;
74 	struct folio *folio;
75 	pgoff_t end;
76 
77 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78 
79 	if (!len)
80 		return;
81 
82 	rcu_read_lock();
83 
84 	end = (start + len - 1) / PAGE_SIZE;
85 	xas_for_each(&xas, folio, end) {
86 		if (xas_retry(&xas, folio))
87 			continue;
88 		if (!folio_test_writeback(folio)) {
89 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90 				  len, start, folio->index, end);
91 			continue;
92 		}
93 
94 		folio_detach_private(folio);
95 		folio_end_writeback(folio);
96 	}
97 
98 	rcu_read_unlock();
99 }
100 
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106 	struct address_space *mapping = inode->i_mapping;
107 	struct folio *folio;
108 	pgoff_t end;
109 
110 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111 
112 	if (!len)
113 		return;
114 
115 	rcu_read_lock();
116 
117 	end = (start + len - 1) / PAGE_SIZE;
118 	xas_for_each(&xas, folio, end) {
119 		if (xas_retry(&xas, folio))
120 			continue;
121 		if (!folio_test_writeback(folio)) {
122 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123 				  len, start, folio->index, end);
124 			continue;
125 		}
126 
127 		folio_set_error(folio);
128 		folio_end_writeback(folio);
129 	}
130 
131 	rcu_read_unlock();
132 }
133 
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139 	struct address_space *mapping = inode->i_mapping;
140 	struct folio *folio;
141 	pgoff_t end;
142 
143 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144 
145 	if (!len)
146 		return;
147 
148 	rcu_read_lock();
149 
150 	end = (start + len - 1) / PAGE_SIZE;
151 	xas_for_each(&xas, folio, end) {
152 		if (!folio_test_writeback(folio)) {
153 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154 				  len, start, folio->index, end);
155 			continue;
156 		}
157 
158 		filemap_dirty_folio(folio->mapping, folio);
159 		folio_end_writeback(folio);
160 	}
161 
162 	rcu_read_unlock();
163 }
164 
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172 	struct cifsFileInfo *open_file = NULL;
173 	struct list_head *tmp;
174 	struct list_head *tmp1;
175 
176 	/* only send once per connect */
177 	spin_lock(&tcon->tc_lock);
178 	if (tcon->status != TID_NEED_RECON) {
179 		spin_unlock(&tcon->tc_lock);
180 		return;
181 	}
182 	tcon->status = TID_IN_FILES_INVALIDATE;
183 	spin_unlock(&tcon->tc_lock);
184 
185 	/* list all files open on tree connection and mark them invalid */
186 	spin_lock(&tcon->open_file_lock);
187 	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
188 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
189 		open_file->invalidHandle = true;
190 		open_file->oplock_break_cancelled = true;
191 	}
192 	spin_unlock(&tcon->open_file_lock);
193 
194 	invalidate_all_cached_dirs(tcon);
195 	spin_lock(&tcon->tc_lock);
196 	if (tcon->status == TID_IN_FILES_INVALIDATE)
197 		tcon->status = TID_NEED_TCON;
198 	spin_unlock(&tcon->tc_lock);
199 
200 	/*
201 	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
202 	 * to this tcon.
203 	 */
204 }
205 
206 static inline int cifs_convert_flags(unsigned int flags)
207 {
208 	if ((flags & O_ACCMODE) == O_RDONLY)
209 		return GENERIC_READ;
210 	else if ((flags & O_ACCMODE) == O_WRONLY)
211 		return GENERIC_WRITE;
212 	else if ((flags & O_ACCMODE) == O_RDWR) {
213 		/* GENERIC_ALL is too much permission to request
214 		   can cause unnecessary access denied on create */
215 		/* return GENERIC_ALL; */
216 		return (GENERIC_READ | GENERIC_WRITE);
217 	}
218 
219 	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
220 		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
221 		FILE_READ_DATA);
222 }
223 
224 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
225 static u32 cifs_posix_convert_flags(unsigned int flags)
226 {
227 	u32 posix_flags = 0;
228 
229 	if ((flags & O_ACCMODE) == O_RDONLY)
230 		posix_flags = SMB_O_RDONLY;
231 	else if ((flags & O_ACCMODE) == O_WRONLY)
232 		posix_flags = SMB_O_WRONLY;
233 	else if ((flags & O_ACCMODE) == O_RDWR)
234 		posix_flags = SMB_O_RDWR;
235 
236 	if (flags & O_CREAT) {
237 		posix_flags |= SMB_O_CREAT;
238 		if (flags & O_EXCL)
239 			posix_flags |= SMB_O_EXCL;
240 	} else if (flags & O_EXCL)
241 		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
242 			 current->comm, current->tgid);
243 
244 	if (flags & O_TRUNC)
245 		posix_flags |= SMB_O_TRUNC;
246 	/* be safe and imply O_SYNC for O_DSYNC */
247 	if (flags & O_DSYNC)
248 		posix_flags |= SMB_O_SYNC;
249 	if (flags & O_DIRECTORY)
250 		posix_flags |= SMB_O_DIRECTORY;
251 	if (flags & O_NOFOLLOW)
252 		posix_flags |= SMB_O_NOFOLLOW;
253 	if (flags & O_DIRECT)
254 		posix_flags |= SMB_O_DIRECT;
255 
256 	return posix_flags;
257 }
258 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
259 
260 static inline int cifs_get_disposition(unsigned int flags)
261 {
262 	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
263 		return FILE_CREATE;
264 	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
265 		return FILE_OVERWRITE_IF;
266 	else if ((flags & O_CREAT) == O_CREAT)
267 		return FILE_OPEN_IF;
268 	else if ((flags & O_TRUNC) == O_TRUNC)
269 		return FILE_OVERWRITE;
270 	else
271 		return FILE_OPEN;
272 }
273 
274 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
275 int cifs_posix_open(const char *full_path, struct inode **pinode,
276 			struct super_block *sb, int mode, unsigned int f_flags,
277 			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
278 {
279 	int rc;
280 	FILE_UNIX_BASIC_INFO *presp_data;
281 	__u32 posix_flags = 0;
282 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
283 	struct cifs_fattr fattr;
284 	struct tcon_link *tlink;
285 	struct cifs_tcon *tcon;
286 
287 	cifs_dbg(FYI, "posix open %s\n", full_path);
288 
289 	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
290 	if (presp_data == NULL)
291 		return -ENOMEM;
292 
293 	tlink = cifs_sb_tlink(cifs_sb);
294 	if (IS_ERR(tlink)) {
295 		rc = PTR_ERR(tlink);
296 		goto posix_open_ret;
297 	}
298 
299 	tcon = tlink_tcon(tlink);
300 	mode &= ~current_umask();
301 
302 	posix_flags = cifs_posix_convert_flags(f_flags);
303 	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
304 			     poplock, full_path, cifs_sb->local_nls,
305 			     cifs_remap(cifs_sb));
306 	cifs_put_tlink(tlink);
307 
308 	if (rc)
309 		goto posix_open_ret;
310 
311 	if (presp_data->Type == cpu_to_le32(-1))
312 		goto posix_open_ret; /* open ok, caller does qpathinfo */
313 
314 	if (!pinode)
315 		goto posix_open_ret; /* caller does not need info */
316 
317 	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
318 
319 	/* get new inode and set it up */
320 	if (*pinode == NULL) {
321 		cifs_fill_uniqueid(sb, &fattr);
322 		*pinode = cifs_iget(sb, &fattr);
323 		if (!*pinode) {
324 			rc = -ENOMEM;
325 			goto posix_open_ret;
326 		}
327 	} else {
328 		cifs_revalidate_mapping(*pinode);
329 		rc = cifs_fattr_to_inode(*pinode, &fattr);
330 	}
331 
332 posix_open_ret:
333 	kfree(presp_data);
334 	return rc;
335 }
336 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
337 
338 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
339 			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
340 			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
341 {
342 	int rc;
343 	int desired_access;
344 	int disposition;
345 	int create_options = CREATE_NOT_DIR;
346 	struct TCP_Server_Info *server = tcon->ses->server;
347 	struct cifs_open_parms oparms;
348 
349 	if (!server->ops->open)
350 		return -ENOSYS;
351 
352 	desired_access = cifs_convert_flags(f_flags);
353 
354 /*********************************************************************
355  *  open flag mapping table:
356  *
357  *	POSIX Flag            CIFS Disposition
358  *	----------            ----------------
359  *	O_CREAT               FILE_OPEN_IF
360  *	O_CREAT | O_EXCL      FILE_CREATE
361  *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
362  *	O_TRUNC               FILE_OVERWRITE
363  *	none of the above     FILE_OPEN
364  *
365  *	Note that there is not a direct match between disposition
366  *	FILE_SUPERSEDE (ie create whether or not file exists although
367  *	O_CREAT | O_TRUNC is similar but truncates the existing
368  *	file rather than creating a new file as FILE_SUPERSEDE does
369  *	(which uses the attributes / metadata passed in on open call)
370  *?
371  *?  O_SYNC is a reasonable match to CIFS writethrough flag
372  *?  and the read write flags match reasonably.  O_LARGEFILE
373  *?  is irrelevant because largefile support is always used
374  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
375  *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
376  *********************************************************************/
377 
378 	disposition = cifs_get_disposition(f_flags);
379 
380 	/* BB pass O_SYNC flag through on file attributes .. BB */
381 
382 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
383 	if (f_flags & O_SYNC)
384 		create_options |= CREATE_WRITE_THROUGH;
385 
386 	if (f_flags & O_DIRECT)
387 		create_options |= CREATE_NO_BUFFER;
388 
389 	oparms = (struct cifs_open_parms) {
390 		.tcon = tcon,
391 		.cifs_sb = cifs_sb,
392 		.desired_access = desired_access,
393 		.create_options = cifs_create_options(cifs_sb, create_options),
394 		.disposition = disposition,
395 		.path = full_path,
396 		.fid = fid,
397 	};
398 
399 	rc = server->ops->open(xid, &oparms, oplock, buf);
400 	if (rc)
401 		return rc;
402 
403 	/* TODO: Add support for calling posix query info but with passing in fid */
404 	if (tcon->unix_ext)
405 		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
406 					      xid);
407 	else
408 		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
409 					 xid, fid);
410 
411 	if (rc) {
412 		server->ops->close(xid, tcon, fid);
413 		if (rc == -ESTALE)
414 			rc = -EOPENSTALE;
415 	}
416 
417 	return rc;
418 }
419 
420 static bool
421 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
422 {
423 	struct cifs_fid_locks *cur;
424 	bool has_locks = false;
425 
426 	down_read(&cinode->lock_sem);
427 	list_for_each_entry(cur, &cinode->llist, llist) {
428 		if (!list_empty(&cur->locks)) {
429 			has_locks = true;
430 			break;
431 		}
432 	}
433 	up_read(&cinode->lock_sem);
434 	return has_locks;
435 }
436 
437 void
438 cifs_down_write(struct rw_semaphore *sem)
439 {
440 	while (!down_write_trylock(sem))
441 		msleep(10);
442 }
443 
444 static void cifsFileInfo_put_work(struct work_struct *work);
445 
446 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
447 				       struct tcon_link *tlink, __u32 oplock,
448 				       const char *symlink_target)
449 {
450 	struct dentry *dentry = file_dentry(file);
451 	struct inode *inode = d_inode(dentry);
452 	struct cifsInodeInfo *cinode = CIFS_I(inode);
453 	struct cifsFileInfo *cfile;
454 	struct cifs_fid_locks *fdlocks;
455 	struct cifs_tcon *tcon = tlink_tcon(tlink);
456 	struct TCP_Server_Info *server = tcon->ses->server;
457 
458 	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
459 	if (cfile == NULL)
460 		return cfile;
461 
462 	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
463 	if (!fdlocks) {
464 		kfree(cfile);
465 		return NULL;
466 	}
467 
468 	if (symlink_target) {
469 		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
470 		if (!cfile->symlink_target) {
471 			kfree(fdlocks);
472 			kfree(cfile);
473 			return NULL;
474 		}
475 	}
476 
477 	INIT_LIST_HEAD(&fdlocks->locks);
478 	fdlocks->cfile = cfile;
479 	cfile->llist = fdlocks;
480 
481 	cfile->count = 1;
482 	cfile->pid = current->tgid;
483 	cfile->uid = current_fsuid();
484 	cfile->dentry = dget(dentry);
485 	cfile->f_flags = file->f_flags;
486 	cfile->invalidHandle = false;
487 	cfile->deferred_close_scheduled = false;
488 	cfile->tlink = cifs_get_tlink(tlink);
489 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
490 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
491 	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
492 	mutex_init(&cfile->fh_mutex);
493 	spin_lock_init(&cfile->file_info_lock);
494 
495 	cifs_sb_active(inode->i_sb);
496 
497 	/*
498 	 * If the server returned a read oplock and we have mandatory brlocks,
499 	 * set oplock level to None.
500 	 */
501 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
502 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
503 		oplock = 0;
504 	}
505 
506 	cifs_down_write(&cinode->lock_sem);
507 	list_add(&fdlocks->llist, &cinode->llist);
508 	up_write(&cinode->lock_sem);
509 
510 	spin_lock(&tcon->open_file_lock);
511 	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
512 		oplock = fid->pending_open->oplock;
513 	list_del(&fid->pending_open->olist);
514 
515 	fid->purge_cache = false;
516 	server->ops->set_fid(cfile, fid, oplock);
517 
518 	list_add(&cfile->tlist, &tcon->openFileList);
519 	atomic_inc(&tcon->num_local_opens);
520 
521 	/* if readable file instance put first in list*/
522 	spin_lock(&cinode->open_file_lock);
523 	if (file->f_mode & FMODE_READ)
524 		list_add(&cfile->flist, &cinode->openFileList);
525 	else
526 		list_add_tail(&cfile->flist, &cinode->openFileList);
527 	spin_unlock(&cinode->open_file_lock);
528 	spin_unlock(&tcon->open_file_lock);
529 
530 	if (fid->purge_cache)
531 		cifs_zap_mapping(inode);
532 
533 	file->private_data = cfile;
534 	return cfile;
535 }
536 
537 struct cifsFileInfo *
538 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
539 {
540 	spin_lock(&cifs_file->file_info_lock);
541 	cifsFileInfo_get_locked(cifs_file);
542 	spin_unlock(&cifs_file->file_info_lock);
543 	return cifs_file;
544 }
545 
546 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
547 {
548 	struct inode *inode = d_inode(cifs_file->dentry);
549 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
550 	struct cifsLockInfo *li, *tmp;
551 	struct super_block *sb = inode->i_sb;
552 
553 	/*
554 	 * Delete any outstanding lock records. We'll lose them when the file
555 	 * is closed anyway.
556 	 */
557 	cifs_down_write(&cifsi->lock_sem);
558 	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
559 		list_del(&li->llist);
560 		cifs_del_lock_waiters(li);
561 		kfree(li);
562 	}
563 	list_del(&cifs_file->llist->llist);
564 	kfree(cifs_file->llist);
565 	up_write(&cifsi->lock_sem);
566 
567 	cifs_put_tlink(cifs_file->tlink);
568 	dput(cifs_file->dentry);
569 	cifs_sb_deactive(sb);
570 	kfree(cifs_file->symlink_target);
571 	kfree(cifs_file);
572 }
573 
574 static void cifsFileInfo_put_work(struct work_struct *work)
575 {
576 	struct cifsFileInfo *cifs_file = container_of(work,
577 			struct cifsFileInfo, put);
578 
579 	cifsFileInfo_put_final(cifs_file);
580 }
581 
582 /**
583  * cifsFileInfo_put - release a reference of file priv data
584  *
585  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
586  *
587  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
588  */
589 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
590 {
591 	_cifsFileInfo_put(cifs_file, true, true);
592 }
593 
594 /**
595  * _cifsFileInfo_put - release a reference of file priv data
596  *
597  * This may involve closing the filehandle @cifs_file out on the
598  * server. Must be called without holding tcon->open_file_lock,
599  * cinode->open_file_lock and cifs_file->file_info_lock.
600  *
601  * If @wait_for_oplock_handler is true and we are releasing the last
602  * reference, wait for any running oplock break handler of the file
603  * and cancel any pending one.
604  *
605  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
606  * @wait_oplock_handler: must be false if called from oplock_break_handler
607  * @offload:	not offloaded on close and oplock breaks
608  *
609  */
610 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
611 		       bool wait_oplock_handler, bool offload)
612 {
613 	struct inode *inode = d_inode(cifs_file->dentry);
614 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
615 	struct TCP_Server_Info *server = tcon->ses->server;
616 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
617 	struct super_block *sb = inode->i_sb;
618 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
619 	struct cifs_fid fid = {};
620 	struct cifs_pending_open open;
621 	bool oplock_break_cancelled;
622 
623 	spin_lock(&tcon->open_file_lock);
624 	spin_lock(&cifsi->open_file_lock);
625 	spin_lock(&cifs_file->file_info_lock);
626 	if (--cifs_file->count > 0) {
627 		spin_unlock(&cifs_file->file_info_lock);
628 		spin_unlock(&cifsi->open_file_lock);
629 		spin_unlock(&tcon->open_file_lock);
630 		return;
631 	}
632 	spin_unlock(&cifs_file->file_info_lock);
633 
634 	if (server->ops->get_lease_key)
635 		server->ops->get_lease_key(inode, &fid);
636 
637 	/* store open in pending opens to make sure we don't miss lease break */
638 	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
639 
640 	/* remove it from the lists */
641 	list_del(&cifs_file->flist);
642 	list_del(&cifs_file->tlist);
643 	atomic_dec(&tcon->num_local_opens);
644 
645 	if (list_empty(&cifsi->openFileList)) {
646 		cifs_dbg(FYI, "closing last open instance for inode %p\n",
647 			 d_inode(cifs_file->dentry));
648 		/*
649 		 * In strict cache mode we need invalidate mapping on the last
650 		 * close  because it may cause a error when we open this file
651 		 * again and get at least level II oplock.
652 		 */
653 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
654 			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
655 		cifs_set_oplock_level(cifsi, 0);
656 	}
657 
658 	spin_unlock(&cifsi->open_file_lock);
659 	spin_unlock(&tcon->open_file_lock);
660 
661 	oplock_break_cancelled = wait_oplock_handler ?
662 		cancel_work_sync(&cifs_file->oplock_break) : false;
663 
664 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
665 		struct TCP_Server_Info *server = tcon->ses->server;
666 		unsigned int xid;
667 
668 		xid = get_xid();
669 		if (server->ops->close_getattr)
670 			server->ops->close_getattr(xid, tcon, cifs_file);
671 		else if (server->ops->close)
672 			server->ops->close(xid, tcon, &cifs_file->fid);
673 		_free_xid(xid);
674 	}
675 
676 	if (oplock_break_cancelled)
677 		cifs_done_oplock_break(cifsi);
678 
679 	cifs_del_pending_open(&open);
680 
681 	if (offload)
682 		queue_work(fileinfo_put_wq, &cifs_file->put);
683 	else
684 		cifsFileInfo_put_final(cifs_file);
685 }
686 
687 int cifs_open(struct inode *inode, struct file *file)
688 
689 {
690 	int rc = -EACCES;
691 	unsigned int xid;
692 	__u32 oplock;
693 	struct cifs_sb_info *cifs_sb;
694 	struct TCP_Server_Info *server;
695 	struct cifs_tcon *tcon;
696 	struct tcon_link *tlink;
697 	struct cifsFileInfo *cfile = NULL;
698 	void *page;
699 	const char *full_path;
700 	bool posix_open_ok = false;
701 	struct cifs_fid fid = {};
702 	struct cifs_pending_open open;
703 	struct cifs_open_info_data data = {};
704 
705 	xid = get_xid();
706 
707 	cifs_sb = CIFS_SB(inode->i_sb);
708 	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
709 		free_xid(xid);
710 		return -EIO;
711 	}
712 
713 	tlink = cifs_sb_tlink(cifs_sb);
714 	if (IS_ERR(tlink)) {
715 		free_xid(xid);
716 		return PTR_ERR(tlink);
717 	}
718 	tcon = tlink_tcon(tlink);
719 	server = tcon->ses->server;
720 
721 	page = alloc_dentry_path();
722 	full_path = build_path_from_dentry(file_dentry(file), page);
723 	if (IS_ERR(full_path)) {
724 		rc = PTR_ERR(full_path);
725 		goto out;
726 	}
727 
728 	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
729 		 inode, file->f_flags, full_path);
730 
731 	if (file->f_flags & O_DIRECT &&
732 	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
733 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
734 			file->f_op = &cifs_file_direct_nobrl_ops;
735 		else
736 			file->f_op = &cifs_file_direct_ops;
737 	}
738 
739 	/* Get the cached handle as SMB2 close is deferred */
740 	rc = cifs_get_readable_path(tcon, full_path, &cfile);
741 	if (rc == 0) {
742 		if (file->f_flags == cfile->f_flags) {
743 			file->private_data = cfile;
744 			spin_lock(&CIFS_I(inode)->deferred_lock);
745 			cifs_del_deferred_close(cfile);
746 			spin_unlock(&CIFS_I(inode)->deferred_lock);
747 			goto use_cache;
748 		} else {
749 			_cifsFileInfo_put(cfile, true, false);
750 		}
751 	}
752 
753 	if (server->oplocks)
754 		oplock = REQ_OPLOCK;
755 	else
756 		oplock = 0;
757 
758 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
759 	if (!tcon->broken_posix_open && tcon->unix_ext &&
760 	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
761 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
762 		/* can not refresh inode info since size could be stale */
763 		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
764 				cifs_sb->ctx->file_mode /* ignored */,
765 				file->f_flags, &oplock, &fid.netfid, xid);
766 		if (rc == 0) {
767 			cifs_dbg(FYI, "posix open succeeded\n");
768 			posix_open_ok = true;
769 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
770 			if (tcon->ses->serverNOS)
771 				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
772 					 tcon->ses->ip_addr,
773 					 tcon->ses->serverNOS);
774 			tcon->broken_posix_open = true;
775 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
776 			 (rc != -EOPNOTSUPP)) /* path not found or net err */
777 			goto out;
778 		/*
779 		 * Else fallthrough to retry open the old way on network i/o
780 		 * or DFS errors.
781 		 */
782 	}
783 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
784 
785 	if (server->ops->get_lease_key)
786 		server->ops->get_lease_key(inode, &fid);
787 
788 	cifs_add_pending_open(&fid, tlink, &open);
789 
790 	if (!posix_open_ok) {
791 		if (server->ops->get_lease_key)
792 			server->ops->get_lease_key(inode, &fid);
793 
794 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
795 				  xid, &data);
796 		if (rc) {
797 			cifs_del_pending_open(&open);
798 			goto out;
799 		}
800 	}
801 
802 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
803 	if (cfile == NULL) {
804 		if (server->ops->close)
805 			server->ops->close(xid, tcon, &fid);
806 		cifs_del_pending_open(&open);
807 		rc = -ENOMEM;
808 		goto out;
809 	}
810 
811 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
812 	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
813 		/*
814 		 * Time to set mode which we can not set earlier due to
815 		 * problems creating new read-only files.
816 		 */
817 		struct cifs_unix_set_info_args args = {
818 			.mode	= inode->i_mode,
819 			.uid	= INVALID_UID, /* no change */
820 			.gid	= INVALID_GID, /* no change */
821 			.ctime	= NO_CHANGE_64,
822 			.atime	= NO_CHANGE_64,
823 			.mtime	= NO_CHANGE_64,
824 			.device	= 0,
825 		};
826 		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
827 				       cfile->pid);
828 	}
829 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
830 
831 use_cache:
832 	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
833 			   file->f_mode & FMODE_WRITE);
834 	if (file->f_flags & O_DIRECT &&
835 	    (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
836 	     file->f_flags & O_APPEND))
837 		cifs_invalidate_cache(file_inode(file),
838 				      FSCACHE_INVAL_DIO_WRITE);
839 
840 out:
841 	free_dentry_path(page);
842 	free_xid(xid);
843 	cifs_put_tlink(tlink);
844 	cifs_free_open_info(&data);
845 	return rc;
846 }
847 
848 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
849 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
850 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
851 
852 /*
853  * Try to reacquire byte range locks that were released when session
854  * to server was lost.
855  */
856 static int
857 cifs_relock_file(struct cifsFileInfo *cfile)
858 {
859 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
860 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
861 	int rc = 0;
862 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
863 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
864 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
865 
866 	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
867 	if (cinode->can_cache_brlcks) {
868 		/* can cache locks - no need to relock */
869 		up_read(&cinode->lock_sem);
870 		return rc;
871 	}
872 
873 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
874 	if (cap_unix(tcon->ses) &&
875 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
876 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
877 		rc = cifs_push_posix_locks(cfile);
878 	else
879 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
880 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
881 
882 	up_read(&cinode->lock_sem);
883 	return rc;
884 }
885 
886 static int
887 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
888 {
889 	int rc = -EACCES;
890 	unsigned int xid;
891 	__u32 oplock;
892 	struct cifs_sb_info *cifs_sb;
893 	struct cifs_tcon *tcon;
894 	struct TCP_Server_Info *server;
895 	struct cifsInodeInfo *cinode;
896 	struct inode *inode;
897 	void *page;
898 	const char *full_path;
899 	int desired_access;
900 	int disposition = FILE_OPEN;
901 	int create_options = CREATE_NOT_DIR;
902 	struct cifs_open_parms oparms;
903 
904 	xid = get_xid();
905 	mutex_lock(&cfile->fh_mutex);
906 	if (!cfile->invalidHandle) {
907 		mutex_unlock(&cfile->fh_mutex);
908 		free_xid(xid);
909 		return 0;
910 	}
911 
912 	inode = d_inode(cfile->dentry);
913 	cifs_sb = CIFS_SB(inode->i_sb);
914 	tcon = tlink_tcon(cfile->tlink);
915 	server = tcon->ses->server;
916 
917 	/*
918 	 * Can not grab rename sem here because various ops, including those
919 	 * that already have the rename sem can end up causing writepage to get
920 	 * called and if the server was down that means we end up here, and we
921 	 * can never tell if the caller already has the rename_sem.
922 	 */
923 	page = alloc_dentry_path();
924 	full_path = build_path_from_dentry(cfile->dentry, page);
925 	if (IS_ERR(full_path)) {
926 		mutex_unlock(&cfile->fh_mutex);
927 		free_dentry_path(page);
928 		free_xid(xid);
929 		return PTR_ERR(full_path);
930 	}
931 
932 	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
933 		 inode, cfile->f_flags, full_path);
934 
935 	if (tcon->ses->server->oplocks)
936 		oplock = REQ_OPLOCK;
937 	else
938 		oplock = 0;
939 
940 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
941 	if (tcon->unix_ext && cap_unix(tcon->ses) &&
942 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
943 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
944 		/*
945 		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
946 		 * original open. Must mask them off for a reopen.
947 		 */
948 		unsigned int oflags = cfile->f_flags &
949 						~(O_CREAT | O_EXCL | O_TRUNC);
950 
951 		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
952 				     cifs_sb->ctx->file_mode /* ignored */,
953 				     oflags, &oplock, &cfile->fid.netfid, xid);
954 		if (rc == 0) {
955 			cifs_dbg(FYI, "posix reopen succeeded\n");
956 			oparms.reconnect = true;
957 			goto reopen_success;
958 		}
959 		/*
960 		 * fallthrough to retry open the old way on errors, especially
961 		 * in the reconnect path it is important to retry hard
962 		 */
963 	}
964 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
965 
966 	desired_access = cifs_convert_flags(cfile->f_flags);
967 
968 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
969 	if (cfile->f_flags & O_SYNC)
970 		create_options |= CREATE_WRITE_THROUGH;
971 
972 	if (cfile->f_flags & O_DIRECT)
973 		create_options |= CREATE_NO_BUFFER;
974 
975 	if (server->ops->get_lease_key)
976 		server->ops->get_lease_key(inode, &cfile->fid);
977 
978 	oparms = (struct cifs_open_parms) {
979 		.tcon = tcon,
980 		.cifs_sb = cifs_sb,
981 		.desired_access = desired_access,
982 		.create_options = cifs_create_options(cifs_sb, create_options),
983 		.disposition = disposition,
984 		.path = full_path,
985 		.fid = &cfile->fid,
986 		.reconnect = true,
987 	};
988 
989 	/*
990 	 * Can not refresh inode by passing in file_info buf to be returned by
991 	 * ops->open and then calling get_inode_info with returned buf since
992 	 * file might have write behind data that needs to be flushed and server
993 	 * version of file size can be stale. If we knew for sure that inode was
994 	 * not dirty locally we could do this.
995 	 */
996 	rc = server->ops->open(xid, &oparms, &oplock, NULL);
997 	if (rc == -ENOENT && oparms.reconnect == false) {
998 		/* durable handle timeout is expired - open the file again */
999 		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1000 		/* indicate that we need to relock the file */
1001 		oparms.reconnect = true;
1002 	}
1003 
1004 	if (rc) {
1005 		mutex_unlock(&cfile->fh_mutex);
1006 		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1007 		cifs_dbg(FYI, "oplock: %d\n", oplock);
1008 		goto reopen_error_exit;
1009 	}
1010 
1011 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1012 reopen_success:
1013 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1014 	cfile->invalidHandle = false;
1015 	mutex_unlock(&cfile->fh_mutex);
1016 	cinode = CIFS_I(inode);
1017 
1018 	if (can_flush) {
1019 		rc = filemap_write_and_wait(inode->i_mapping);
1020 		if (!is_interrupt_error(rc))
1021 			mapping_set_error(inode->i_mapping, rc);
1022 
1023 		if (tcon->posix_extensions) {
1024 			rc = smb311_posix_get_inode_info(&inode, full_path,
1025 							 NULL, inode->i_sb, xid);
1026 		} else if (tcon->unix_ext) {
1027 			rc = cifs_get_inode_info_unix(&inode, full_path,
1028 						      inode->i_sb, xid);
1029 		} else {
1030 			rc = cifs_get_inode_info(&inode, full_path, NULL,
1031 						 inode->i_sb, xid, NULL);
1032 		}
1033 	}
1034 	/*
1035 	 * Else we are writing out data to server already and could deadlock if
1036 	 * we tried to flush data, and since we do not know if we have data that
1037 	 * would invalidate the current end of file on the server we can not go
1038 	 * to the server to get the new inode info.
1039 	 */
1040 
1041 	/*
1042 	 * If the server returned a read oplock and we have mandatory brlocks,
1043 	 * set oplock level to None.
1044 	 */
1045 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1046 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1047 		oplock = 0;
1048 	}
1049 
1050 	server->ops->set_fid(cfile, &cfile->fid, oplock);
1051 	if (oparms.reconnect)
1052 		cifs_relock_file(cfile);
1053 
1054 reopen_error_exit:
1055 	free_dentry_path(page);
1056 	free_xid(xid);
1057 	return rc;
1058 }
1059 
1060 void smb2_deferred_work_close(struct work_struct *work)
1061 {
1062 	struct cifsFileInfo *cfile = container_of(work,
1063 			struct cifsFileInfo, deferred.work);
1064 
1065 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1066 	cifs_del_deferred_close(cfile);
1067 	cfile->deferred_close_scheduled = false;
1068 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1069 	_cifsFileInfo_put(cfile, true, false);
1070 }
1071 
1072 int cifs_close(struct inode *inode, struct file *file)
1073 {
1074 	struct cifsFileInfo *cfile;
1075 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1076 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1077 	struct cifs_deferred_close *dclose;
1078 
1079 	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1080 
1081 	if (file->private_data != NULL) {
1082 		cfile = file->private_data;
1083 		file->private_data = NULL;
1084 		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1085 		if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1086 		    && cinode->lease_granted &&
1087 		    !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1088 		    dclose) {
1089 			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1090 				inode_set_mtime_to_ts(inode,
1091 						      inode_set_ctime_current(inode));
1092 			}
1093 			spin_lock(&cinode->deferred_lock);
1094 			cifs_add_deferred_close(cfile, dclose);
1095 			if (cfile->deferred_close_scheduled &&
1096 			    delayed_work_pending(&cfile->deferred)) {
1097 				/*
1098 				 * If there is no pending work, mod_delayed_work queues new work.
1099 				 * So, Increase the ref count to avoid use-after-free.
1100 				 */
1101 				if (!mod_delayed_work(deferredclose_wq,
1102 						&cfile->deferred, cifs_sb->ctx->closetimeo))
1103 					cifsFileInfo_get(cfile);
1104 			} else {
1105 				/* Deferred close for files */
1106 				queue_delayed_work(deferredclose_wq,
1107 						&cfile->deferred, cifs_sb->ctx->closetimeo);
1108 				cfile->deferred_close_scheduled = true;
1109 				spin_unlock(&cinode->deferred_lock);
1110 				return 0;
1111 			}
1112 			spin_unlock(&cinode->deferred_lock);
1113 			_cifsFileInfo_put(cfile, true, false);
1114 		} else {
1115 			_cifsFileInfo_put(cfile, true, false);
1116 			kfree(dclose);
1117 		}
1118 	}
1119 
1120 	/* return code from the ->release op is always ignored */
1121 	return 0;
1122 }
1123 
1124 void
1125 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1126 {
1127 	struct cifsFileInfo *open_file, *tmp;
1128 	struct list_head tmp_list;
1129 
1130 	if (!tcon->use_persistent || !tcon->need_reopen_files)
1131 		return;
1132 
1133 	tcon->need_reopen_files = false;
1134 
1135 	cifs_dbg(FYI, "Reopen persistent handles\n");
1136 	INIT_LIST_HEAD(&tmp_list);
1137 
1138 	/* list all files open on tree connection, reopen resilient handles  */
1139 	spin_lock(&tcon->open_file_lock);
1140 	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1141 		if (!open_file->invalidHandle)
1142 			continue;
1143 		cifsFileInfo_get(open_file);
1144 		list_add_tail(&open_file->rlist, &tmp_list);
1145 	}
1146 	spin_unlock(&tcon->open_file_lock);
1147 
1148 	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1149 		if (cifs_reopen_file(open_file, false /* do not flush */))
1150 			tcon->need_reopen_files = true;
1151 		list_del_init(&open_file->rlist);
1152 		cifsFileInfo_put(open_file);
1153 	}
1154 }
1155 
1156 int cifs_closedir(struct inode *inode, struct file *file)
1157 {
1158 	int rc = 0;
1159 	unsigned int xid;
1160 	struct cifsFileInfo *cfile = file->private_data;
1161 	struct cifs_tcon *tcon;
1162 	struct TCP_Server_Info *server;
1163 	char *buf;
1164 
1165 	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1166 
1167 	if (cfile == NULL)
1168 		return rc;
1169 
1170 	xid = get_xid();
1171 	tcon = tlink_tcon(cfile->tlink);
1172 	server = tcon->ses->server;
1173 
1174 	cifs_dbg(FYI, "Freeing private data in close dir\n");
1175 	spin_lock(&cfile->file_info_lock);
1176 	if (server->ops->dir_needs_close(cfile)) {
1177 		cfile->invalidHandle = true;
1178 		spin_unlock(&cfile->file_info_lock);
1179 		if (server->ops->close_dir)
1180 			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1181 		else
1182 			rc = -ENOSYS;
1183 		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1184 		/* not much we can do if it fails anyway, ignore rc */
1185 		rc = 0;
1186 	} else
1187 		spin_unlock(&cfile->file_info_lock);
1188 
1189 	buf = cfile->srch_inf.ntwrk_buf_start;
1190 	if (buf) {
1191 		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1192 		cfile->srch_inf.ntwrk_buf_start = NULL;
1193 		if (cfile->srch_inf.smallBuf)
1194 			cifs_small_buf_release(buf);
1195 		else
1196 			cifs_buf_release(buf);
1197 	}
1198 
1199 	cifs_put_tlink(cfile->tlink);
1200 	kfree(file->private_data);
1201 	file->private_data = NULL;
1202 	/* BB can we lock the filestruct while this is going on? */
1203 	free_xid(xid);
1204 	return rc;
1205 }
1206 
1207 static struct cifsLockInfo *
1208 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1209 {
1210 	struct cifsLockInfo *lock =
1211 		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1212 	if (!lock)
1213 		return lock;
1214 	lock->offset = offset;
1215 	lock->length = length;
1216 	lock->type = type;
1217 	lock->pid = current->tgid;
1218 	lock->flags = flags;
1219 	INIT_LIST_HEAD(&lock->blist);
1220 	init_waitqueue_head(&lock->block_q);
1221 	return lock;
1222 }
1223 
1224 void
1225 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1226 {
1227 	struct cifsLockInfo *li, *tmp;
1228 	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1229 		list_del_init(&li->blist);
1230 		wake_up(&li->block_q);
1231 	}
1232 }
1233 
1234 #define CIFS_LOCK_OP	0
1235 #define CIFS_READ_OP	1
1236 #define CIFS_WRITE_OP	2
1237 
1238 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1239 static bool
1240 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1241 			    __u64 length, __u8 type, __u16 flags,
1242 			    struct cifsFileInfo *cfile,
1243 			    struct cifsLockInfo **conf_lock, int rw_check)
1244 {
1245 	struct cifsLockInfo *li;
1246 	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1247 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1248 
1249 	list_for_each_entry(li, &fdlocks->locks, llist) {
1250 		if (offset + length <= li->offset ||
1251 		    offset >= li->offset + li->length)
1252 			continue;
1253 		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1254 		    server->ops->compare_fids(cfile, cur_cfile)) {
1255 			/* shared lock prevents write op through the same fid */
1256 			if (!(li->type & server->vals->shared_lock_type) ||
1257 			    rw_check != CIFS_WRITE_OP)
1258 				continue;
1259 		}
1260 		if ((type & server->vals->shared_lock_type) &&
1261 		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1262 		     current->tgid == li->pid) || type == li->type))
1263 			continue;
1264 		if (rw_check == CIFS_LOCK_OP &&
1265 		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1266 		    server->ops->compare_fids(cfile, cur_cfile))
1267 			continue;
1268 		if (conf_lock)
1269 			*conf_lock = li;
1270 		return true;
1271 	}
1272 	return false;
1273 }
1274 
1275 bool
1276 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1277 			__u8 type, __u16 flags,
1278 			struct cifsLockInfo **conf_lock, int rw_check)
1279 {
1280 	bool rc = false;
1281 	struct cifs_fid_locks *cur;
1282 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1283 
1284 	list_for_each_entry(cur, &cinode->llist, llist) {
1285 		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1286 						 flags, cfile, conf_lock,
1287 						 rw_check);
1288 		if (rc)
1289 			break;
1290 	}
1291 
1292 	return rc;
1293 }
1294 
1295 /*
1296  * Check if there is another lock that prevents us to set the lock (mandatory
1297  * style). If such a lock exists, update the flock structure with its
1298  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1299  * or leave it the same if we can't. Returns 0 if we don't need to request to
1300  * the server or 1 otherwise.
1301  */
1302 static int
1303 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1304 	       __u8 type, struct file_lock *flock)
1305 {
1306 	int rc = 0;
1307 	struct cifsLockInfo *conf_lock;
1308 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1309 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1310 	bool exist;
1311 
1312 	down_read(&cinode->lock_sem);
1313 
1314 	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1315 					flock->fl_flags, &conf_lock,
1316 					CIFS_LOCK_OP);
1317 	if (exist) {
1318 		flock->fl_start = conf_lock->offset;
1319 		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1320 		flock->fl_pid = conf_lock->pid;
1321 		if (conf_lock->type & server->vals->shared_lock_type)
1322 			flock->fl_type = F_RDLCK;
1323 		else
1324 			flock->fl_type = F_WRLCK;
1325 	} else if (!cinode->can_cache_brlcks)
1326 		rc = 1;
1327 	else
1328 		flock->fl_type = F_UNLCK;
1329 
1330 	up_read(&cinode->lock_sem);
1331 	return rc;
1332 }
1333 
1334 static void
1335 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1336 {
1337 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1338 	cifs_down_write(&cinode->lock_sem);
1339 	list_add_tail(&lock->llist, &cfile->llist->locks);
1340 	up_write(&cinode->lock_sem);
1341 }
1342 
1343 /*
1344  * Set the byte-range lock (mandatory style). Returns:
1345  * 1) 0, if we set the lock and don't need to request to the server;
1346  * 2) 1, if no locks prevent us but we need to request to the server;
1347  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1348  */
1349 static int
1350 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1351 		 bool wait)
1352 {
1353 	struct cifsLockInfo *conf_lock;
1354 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1355 	bool exist;
1356 	int rc = 0;
1357 
1358 try_again:
1359 	exist = false;
1360 	cifs_down_write(&cinode->lock_sem);
1361 
1362 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1363 					lock->type, lock->flags, &conf_lock,
1364 					CIFS_LOCK_OP);
1365 	if (!exist && cinode->can_cache_brlcks) {
1366 		list_add_tail(&lock->llist, &cfile->llist->locks);
1367 		up_write(&cinode->lock_sem);
1368 		return rc;
1369 	}
1370 
1371 	if (!exist)
1372 		rc = 1;
1373 	else if (!wait)
1374 		rc = -EACCES;
1375 	else {
1376 		list_add_tail(&lock->blist, &conf_lock->blist);
1377 		up_write(&cinode->lock_sem);
1378 		rc = wait_event_interruptible(lock->block_q,
1379 					(lock->blist.prev == &lock->blist) &&
1380 					(lock->blist.next == &lock->blist));
1381 		if (!rc)
1382 			goto try_again;
1383 		cifs_down_write(&cinode->lock_sem);
1384 		list_del_init(&lock->blist);
1385 	}
1386 
1387 	up_write(&cinode->lock_sem);
1388 	return rc;
1389 }
1390 
1391 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1392 /*
1393  * Check if there is another lock that prevents us to set the lock (posix
1394  * style). If such a lock exists, update the flock structure with its
1395  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1396  * or leave it the same if we can't. Returns 0 if we don't need to request to
1397  * the server or 1 otherwise.
1398  */
1399 static int
1400 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1401 {
1402 	int rc = 0;
1403 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1404 	unsigned char saved_type = flock->fl_type;
1405 
1406 	if ((flock->fl_flags & FL_POSIX) == 0)
1407 		return 1;
1408 
1409 	down_read(&cinode->lock_sem);
1410 	posix_test_lock(file, flock);
1411 
1412 	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1413 		flock->fl_type = saved_type;
1414 		rc = 1;
1415 	}
1416 
1417 	up_read(&cinode->lock_sem);
1418 	return rc;
1419 }
1420 
1421 /*
1422  * Set the byte-range lock (posix style). Returns:
1423  * 1) <0, if the error occurs while setting the lock;
1424  * 2) 0, if we set the lock and don't need to request to the server;
1425  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1426  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1427  */
1428 static int
1429 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1430 {
1431 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1432 	int rc = FILE_LOCK_DEFERRED + 1;
1433 
1434 	if ((flock->fl_flags & FL_POSIX) == 0)
1435 		return rc;
1436 
1437 	cifs_down_write(&cinode->lock_sem);
1438 	if (!cinode->can_cache_brlcks) {
1439 		up_write(&cinode->lock_sem);
1440 		return rc;
1441 	}
1442 
1443 	rc = posix_lock_file(file, flock, NULL);
1444 	up_write(&cinode->lock_sem);
1445 	return rc;
1446 }
1447 
1448 int
1449 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1450 {
1451 	unsigned int xid;
1452 	int rc = 0, stored_rc;
1453 	struct cifsLockInfo *li, *tmp;
1454 	struct cifs_tcon *tcon;
1455 	unsigned int num, max_num, max_buf;
1456 	LOCKING_ANDX_RANGE *buf, *cur;
1457 	static const int types[] = {
1458 		LOCKING_ANDX_LARGE_FILES,
1459 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1460 	};
1461 	int i;
1462 
1463 	xid = get_xid();
1464 	tcon = tlink_tcon(cfile->tlink);
1465 
1466 	/*
1467 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1468 	 * and check it before using.
1469 	 */
1470 	max_buf = tcon->ses->server->maxBuf;
1471 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1472 		free_xid(xid);
1473 		return -EINVAL;
1474 	}
1475 
1476 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1477 		     PAGE_SIZE);
1478 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1479 			PAGE_SIZE);
1480 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1481 						sizeof(LOCKING_ANDX_RANGE);
1482 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1483 	if (!buf) {
1484 		free_xid(xid);
1485 		return -ENOMEM;
1486 	}
1487 
1488 	for (i = 0; i < 2; i++) {
1489 		cur = buf;
1490 		num = 0;
1491 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1492 			if (li->type != types[i])
1493 				continue;
1494 			cur->Pid = cpu_to_le16(li->pid);
1495 			cur->LengthLow = cpu_to_le32((u32)li->length);
1496 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1497 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1498 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1499 			if (++num == max_num) {
1500 				stored_rc = cifs_lockv(xid, tcon,
1501 						       cfile->fid.netfid,
1502 						       (__u8)li->type, 0, num,
1503 						       buf);
1504 				if (stored_rc)
1505 					rc = stored_rc;
1506 				cur = buf;
1507 				num = 0;
1508 			} else
1509 				cur++;
1510 		}
1511 
1512 		if (num) {
1513 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1514 					       (__u8)types[i], 0, num, buf);
1515 			if (stored_rc)
1516 				rc = stored_rc;
1517 		}
1518 	}
1519 
1520 	kfree(buf);
1521 	free_xid(xid);
1522 	return rc;
1523 }
1524 
1525 static __u32
1526 hash_lockowner(fl_owner_t owner)
1527 {
1528 	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1529 }
1530 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1531 
1532 struct lock_to_push {
1533 	struct list_head llist;
1534 	__u64 offset;
1535 	__u64 length;
1536 	__u32 pid;
1537 	__u16 netfid;
1538 	__u8 type;
1539 };
1540 
1541 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1542 static int
1543 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1544 {
1545 	struct inode *inode = d_inode(cfile->dentry);
1546 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1547 	struct file_lock *flock;
1548 	struct file_lock_context *flctx = locks_inode_context(inode);
1549 	unsigned int count = 0, i;
1550 	int rc = 0, xid, type;
1551 	struct list_head locks_to_send, *el;
1552 	struct lock_to_push *lck, *tmp;
1553 	__u64 length;
1554 
1555 	xid = get_xid();
1556 
1557 	if (!flctx)
1558 		goto out;
1559 
1560 	spin_lock(&flctx->flc_lock);
1561 	list_for_each(el, &flctx->flc_posix) {
1562 		count++;
1563 	}
1564 	spin_unlock(&flctx->flc_lock);
1565 
1566 	INIT_LIST_HEAD(&locks_to_send);
1567 
1568 	/*
1569 	 * Allocating count locks is enough because no FL_POSIX locks can be
1570 	 * added to the list while we are holding cinode->lock_sem that
1571 	 * protects locking operations of this inode.
1572 	 */
1573 	for (i = 0; i < count; i++) {
1574 		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1575 		if (!lck) {
1576 			rc = -ENOMEM;
1577 			goto err_out;
1578 		}
1579 		list_add_tail(&lck->llist, &locks_to_send);
1580 	}
1581 
1582 	el = locks_to_send.next;
1583 	spin_lock(&flctx->flc_lock);
1584 	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1585 		if (el == &locks_to_send) {
1586 			/*
1587 			 * The list ended. We don't have enough allocated
1588 			 * structures - something is really wrong.
1589 			 */
1590 			cifs_dbg(VFS, "Can't push all brlocks!\n");
1591 			break;
1592 		}
1593 		length = cifs_flock_len(flock);
1594 		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1595 			type = CIFS_RDLCK;
1596 		else
1597 			type = CIFS_WRLCK;
1598 		lck = list_entry(el, struct lock_to_push, llist);
1599 		lck->pid = hash_lockowner(flock->fl_owner);
1600 		lck->netfid = cfile->fid.netfid;
1601 		lck->length = length;
1602 		lck->type = type;
1603 		lck->offset = flock->fl_start;
1604 	}
1605 	spin_unlock(&flctx->flc_lock);
1606 
1607 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1608 		int stored_rc;
1609 
1610 		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1611 					     lck->offset, lck->length, NULL,
1612 					     lck->type, 0);
1613 		if (stored_rc)
1614 			rc = stored_rc;
1615 		list_del(&lck->llist);
1616 		kfree(lck);
1617 	}
1618 
1619 out:
1620 	free_xid(xid);
1621 	return rc;
1622 err_out:
1623 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1624 		list_del(&lck->llist);
1625 		kfree(lck);
1626 	}
1627 	goto out;
1628 }
1629 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1630 
1631 static int
1632 cifs_push_locks(struct cifsFileInfo *cfile)
1633 {
1634 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1635 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1636 	int rc = 0;
1637 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1638 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1639 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1640 
1641 	/* we are going to update can_cache_brlcks here - need a write access */
1642 	cifs_down_write(&cinode->lock_sem);
1643 	if (!cinode->can_cache_brlcks) {
1644 		up_write(&cinode->lock_sem);
1645 		return rc;
1646 	}
1647 
1648 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1649 	if (cap_unix(tcon->ses) &&
1650 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1651 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1652 		rc = cifs_push_posix_locks(cfile);
1653 	else
1654 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1655 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1656 
1657 	cinode->can_cache_brlcks = false;
1658 	up_write(&cinode->lock_sem);
1659 	return rc;
1660 }
1661 
1662 static void
1663 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1664 		bool *wait_flag, struct TCP_Server_Info *server)
1665 {
1666 	if (flock->fl_flags & FL_POSIX)
1667 		cifs_dbg(FYI, "Posix\n");
1668 	if (flock->fl_flags & FL_FLOCK)
1669 		cifs_dbg(FYI, "Flock\n");
1670 	if (flock->fl_flags & FL_SLEEP) {
1671 		cifs_dbg(FYI, "Blocking lock\n");
1672 		*wait_flag = true;
1673 	}
1674 	if (flock->fl_flags & FL_ACCESS)
1675 		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1676 	if (flock->fl_flags & FL_LEASE)
1677 		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1678 	if (flock->fl_flags &
1679 	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1680 	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1681 		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1682 
1683 	*type = server->vals->large_lock_type;
1684 	if (flock->fl_type == F_WRLCK) {
1685 		cifs_dbg(FYI, "F_WRLCK\n");
1686 		*type |= server->vals->exclusive_lock_type;
1687 		*lock = 1;
1688 	} else if (flock->fl_type == F_UNLCK) {
1689 		cifs_dbg(FYI, "F_UNLCK\n");
1690 		*type |= server->vals->unlock_lock_type;
1691 		*unlock = 1;
1692 		/* Check if unlock includes more than one lock range */
1693 	} else if (flock->fl_type == F_RDLCK) {
1694 		cifs_dbg(FYI, "F_RDLCK\n");
1695 		*type |= server->vals->shared_lock_type;
1696 		*lock = 1;
1697 	} else if (flock->fl_type == F_EXLCK) {
1698 		cifs_dbg(FYI, "F_EXLCK\n");
1699 		*type |= server->vals->exclusive_lock_type;
1700 		*lock = 1;
1701 	} else if (flock->fl_type == F_SHLCK) {
1702 		cifs_dbg(FYI, "F_SHLCK\n");
1703 		*type |= server->vals->shared_lock_type;
1704 		*lock = 1;
1705 	} else
1706 		cifs_dbg(FYI, "Unknown type of lock\n");
1707 }
1708 
1709 static int
1710 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1711 	   bool wait_flag, bool posix_lck, unsigned int xid)
1712 {
1713 	int rc = 0;
1714 	__u64 length = cifs_flock_len(flock);
1715 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1716 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1717 	struct TCP_Server_Info *server = tcon->ses->server;
1718 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1719 	__u16 netfid = cfile->fid.netfid;
1720 
1721 	if (posix_lck) {
1722 		int posix_lock_type;
1723 
1724 		rc = cifs_posix_lock_test(file, flock);
1725 		if (!rc)
1726 			return rc;
1727 
1728 		if (type & server->vals->shared_lock_type)
1729 			posix_lock_type = CIFS_RDLCK;
1730 		else
1731 			posix_lock_type = CIFS_WRLCK;
1732 		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1733 				      hash_lockowner(flock->fl_owner),
1734 				      flock->fl_start, length, flock,
1735 				      posix_lock_type, wait_flag);
1736 		return rc;
1737 	}
1738 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1739 
1740 	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1741 	if (!rc)
1742 		return rc;
1743 
1744 	/* BB we could chain these into one lock request BB */
1745 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1746 				    1, 0, false);
1747 	if (rc == 0) {
1748 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1749 					    type, 0, 1, false);
1750 		flock->fl_type = F_UNLCK;
1751 		if (rc != 0)
1752 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1753 				 rc);
1754 		return 0;
1755 	}
1756 
1757 	if (type & server->vals->shared_lock_type) {
1758 		flock->fl_type = F_WRLCK;
1759 		return 0;
1760 	}
1761 
1762 	type &= ~server->vals->exclusive_lock_type;
1763 
1764 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1765 				    type | server->vals->shared_lock_type,
1766 				    1, 0, false);
1767 	if (rc == 0) {
1768 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1769 			type | server->vals->shared_lock_type, 0, 1, false);
1770 		flock->fl_type = F_RDLCK;
1771 		if (rc != 0)
1772 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1773 				 rc);
1774 	} else
1775 		flock->fl_type = F_WRLCK;
1776 
1777 	return 0;
1778 }
1779 
1780 void
1781 cifs_move_llist(struct list_head *source, struct list_head *dest)
1782 {
1783 	struct list_head *li, *tmp;
1784 	list_for_each_safe(li, tmp, source)
1785 		list_move(li, dest);
1786 }
1787 
1788 void
1789 cifs_free_llist(struct list_head *llist)
1790 {
1791 	struct cifsLockInfo *li, *tmp;
1792 	list_for_each_entry_safe(li, tmp, llist, llist) {
1793 		cifs_del_lock_waiters(li);
1794 		list_del(&li->llist);
1795 		kfree(li);
1796 	}
1797 }
1798 
1799 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1800 int
1801 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1802 		  unsigned int xid)
1803 {
1804 	int rc = 0, stored_rc;
1805 	static const int types[] = {
1806 		LOCKING_ANDX_LARGE_FILES,
1807 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1808 	};
1809 	unsigned int i;
1810 	unsigned int max_num, num, max_buf;
1811 	LOCKING_ANDX_RANGE *buf, *cur;
1812 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1813 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1814 	struct cifsLockInfo *li, *tmp;
1815 	__u64 length = cifs_flock_len(flock);
1816 	struct list_head tmp_llist;
1817 
1818 	INIT_LIST_HEAD(&tmp_llist);
1819 
1820 	/*
1821 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1822 	 * and check it before using.
1823 	 */
1824 	max_buf = tcon->ses->server->maxBuf;
1825 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1826 		return -EINVAL;
1827 
1828 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1829 		     PAGE_SIZE);
1830 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1831 			PAGE_SIZE);
1832 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1833 						sizeof(LOCKING_ANDX_RANGE);
1834 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1835 	if (!buf)
1836 		return -ENOMEM;
1837 
1838 	cifs_down_write(&cinode->lock_sem);
1839 	for (i = 0; i < 2; i++) {
1840 		cur = buf;
1841 		num = 0;
1842 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1843 			if (flock->fl_start > li->offset ||
1844 			    (flock->fl_start + length) <
1845 			    (li->offset + li->length))
1846 				continue;
1847 			if (current->tgid != li->pid)
1848 				continue;
1849 			if (types[i] != li->type)
1850 				continue;
1851 			if (cinode->can_cache_brlcks) {
1852 				/*
1853 				 * We can cache brlock requests - simply remove
1854 				 * a lock from the file's list.
1855 				 */
1856 				list_del(&li->llist);
1857 				cifs_del_lock_waiters(li);
1858 				kfree(li);
1859 				continue;
1860 			}
1861 			cur->Pid = cpu_to_le16(li->pid);
1862 			cur->LengthLow = cpu_to_le32((u32)li->length);
1863 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1864 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1865 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1866 			/*
1867 			 * We need to save a lock here to let us add it again to
1868 			 * the file's list if the unlock range request fails on
1869 			 * the server.
1870 			 */
1871 			list_move(&li->llist, &tmp_llist);
1872 			if (++num == max_num) {
1873 				stored_rc = cifs_lockv(xid, tcon,
1874 						       cfile->fid.netfid,
1875 						       li->type, num, 0, buf);
1876 				if (stored_rc) {
1877 					/*
1878 					 * We failed on the unlock range
1879 					 * request - add all locks from the tmp
1880 					 * list to the head of the file's list.
1881 					 */
1882 					cifs_move_llist(&tmp_llist,
1883 							&cfile->llist->locks);
1884 					rc = stored_rc;
1885 				} else
1886 					/*
1887 					 * The unlock range request succeed -
1888 					 * free the tmp list.
1889 					 */
1890 					cifs_free_llist(&tmp_llist);
1891 				cur = buf;
1892 				num = 0;
1893 			} else
1894 				cur++;
1895 		}
1896 		if (num) {
1897 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1898 					       types[i], num, 0, buf);
1899 			if (stored_rc) {
1900 				cifs_move_llist(&tmp_llist,
1901 						&cfile->llist->locks);
1902 				rc = stored_rc;
1903 			} else
1904 				cifs_free_llist(&tmp_llist);
1905 		}
1906 	}
1907 
1908 	up_write(&cinode->lock_sem);
1909 	kfree(buf);
1910 	return rc;
1911 }
1912 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1913 
1914 static int
1915 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1916 	   bool wait_flag, bool posix_lck, int lock, int unlock,
1917 	   unsigned int xid)
1918 {
1919 	int rc = 0;
1920 	__u64 length = cifs_flock_len(flock);
1921 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1922 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1923 	struct TCP_Server_Info *server = tcon->ses->server;
1924 	struct inode *inode = d_inode(cfile->dentry);
1925 
1926 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1927 	if (posix_lck) {
1928 		int posix_lock_type;
1929 
1930 		rc = cifs_posix_lock_set(file, flock);
1931 		if (rc <= FILE_LOCK_DEFERRED)
1932 			return rc;
1933 
1934 		if (type & server->vals->shared_lock_type)
1935 			posix_lock_type = CIFS_RDLCK;
1936 		else
1937 			posix_lock_type = CIFS_WRLCK;
1938 
1939 		if (unlock == 1)
1940 			posix_lock_type = CIFS_UNLCK;
1941 
1942 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1943 				      hash_lockowner(flock->fl_owner),
1944 				      flock->fl_start, length,
1945 				      NULL, posix_lock_type, wait_flag);
1946 		goto out;
1947 	}
1948 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1949 	if (lock) {
1950 		struct cifsLockInfo *lock;
1951 
1952 		lock = cifs_lock_init(flock->fl_start, length, type,
1953 				      flock->fl_flags);
1954 		if (!lock)
1955 			return -ENOMEM;
1956 
1957 		rc = cifs_lock_add_if(cfile, lock, wait_flag);
1958 		if (rc < 0) {
1959 			kfree(lock);
1960 			return rc;
1961 		}
1962 		if (!rc)
1963 			goto out;
1964 
1965 		/*
1966 		 * Windows 7 server can delay breaking lease from read to None
1967 		 * if we set a byte-range lock on a file - break it explicitly
1968 		 * before sending the lock to the server to be sure the next
1969 		 * read won't conflict with non-overlapted locks due to
1970 		 * pagereading.
1971 		 */
1972 		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1973 					CIFS_CACHE_READ(CIFS_I(inode))) {
1974 			cifs_zap_mapping(inode);
1975 			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1976 				 inode);
1977 			CIFS_I(inode)->oplock = 0;
1978 		}
1979 
1980 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1981 					    type, 1, 0, wait_flag);
1982 		if (rc) {
1983 			kfree(lock);
1984 			return rc;
1985 		}
1986 
1987 		cifs_lock_add(cfile, lock);
1988 	} else if (unlock)
1989 		rc = server->ops->mand_unlock_range(cfile, flock, xid);
1990 
1991 out:
1992 	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1993 		/*
1994 		 * If this is a request to remove all locks because we
1995 		 * are closing the file, it doesn't matter if the
1996 		 * unlocking failed as both cifs.ko and the SMB server
1997 		 * remove the lock on file close
1998 		 */
1999 		if (rc) {
2000 			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2001 			if (!(flock->fl_flags & FL_CLOSE))
2002 				return rc;
2003 		}
2004 		rc = locks_lock_file_wait(file, flock);
2005 	}
2006 	return rc;
2007 }
2008 
2009 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2010 {
2011 	int rc, xid;
2012 	int lock = 0, unlock = 0;
2013 	bool wait_flag = false;
2014 	bool posix_lck = false;
2015 	struct cifs_sb_info *cifs_sb;
2016 	struct cifs_tcon *tcon;
2017 	struct cifsFileInfo *cfile;
2018 	__u32 type;
2019 
2020 	xid = get_xid();
2021 
2022 	if (!(fl->fl_flags & FL_FLOCK)) {
2023 		rc = -ENOLCK;
2024 		free_xid(xid);
2025 		return rc;
2026 	}
2027 
2028 	cfile = (struct cifsFileInfo *)file->private_data;
2029 	tcon = tlink_tcon(cfile->tlink);
2030 
2031 	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2032 			tcon->ses->server);
2033 	cifs_sb = CIFS_FILE_SB(file);
2034 
2035 	if (cap_unix(tcon->ses) &&
2036 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2037 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2038 		posix_lck = true;
2039 
2040 	if (!lock && !unlock) {
2041 		/*
2042 		 * if no lock or unlock then nothing to do since we do not
2043 		 * know what it is
2044 		 */
2045 		rc = -EOPNOTSUPP;
2046 		free_xid(xid);
2047 		return rc;
2048 	}
2049 
2050 	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2051 			xid);
2052 	free_xid(xid);
2053 	return rc;
2054 
2055 
2056 }
2057 
2058 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2059 {
2060 	int rc, xid;
2061 	int lock = 0, unlock = 0;
2062 	bool wait_flag = false;
2063 	bool posix_lck = false;
2064 	struct cifs_sb_info *cifs_sb;
2065 	struct cifs_tcon *tcon;
2066 	struct cifsFileInfo *cfile;
2067 	__u32 type;
2068 
2069 	rc = -EACCES;
2070 	xid = get_xid();
2071 
2072 	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2073 		 flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2074 		 (long long)flock->fl_end);
2075 
2076 	cfile = (struct cifsFileInfo *)file->private_data;
2077 	tcon = tlink_tcon(cfile->tlink);
2078 
2079 	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2080 			tcon->ses->server);
2081 	cifs_sb = CIFS_FILE_SB(file);
2082 	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2083 
2084 	if (cap_unix(tcon->ses) &&
2085 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2086 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2087 		posix_lck = true;
2088 	/*
2089 	 * BB add code here to normalize offset and length to account for
2090 	 * negative length which we can not accept over the wire.
2091 	 */
2092 	if (IS_GETLK(cmd)) {
2093 		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2094 		free_xid(xid);
2095 		return rc;
2096 	}
2097 
2098 	if (!lock && !unlock) {
2099 		/*
2100 		 * if no lock or unlock then nothing to do since we do not
2101 		 * know what it is
2102 		 */
2103 		free_xid(xid);
2104 		return -EOPNOTSUPP;
2105 	}
2106 
2107 	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2108 			xid);
2109 	free_xid(xid);
2110 	return rc;
2111 }
2112 
2113 /*
2114  * update the file size (if needed) after a write. Should be called with
2115  * the inode->i_lock held
2116  */
2117 void
2118 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2119 		      unsigned int bytes_written)
2120 {
2121 	loff_t end_of_write = offset + bytes_written;
2122 
2123 	if (end_of_write > cifsi->netfs.remote_i_size)
2124 		netfs_resize_file(&cifsi->netfs, end_of_write, true);
2125 }
2126 
2127 static ssize_t
2128 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2129 	   size_t write_size, loff_t *offset)
2130 {
2131 	int rc = 0;
2132 	unsigned int bytes_written = 0;
2133 	unsigned int total_written;
2134 	struct cifs_tcon *tcon;
2135 	struct TCP_Server_Info *server;
2136 	unsigned int xid;
2137 	struct dentry *dentry = open_file->dentry;
2138 	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2139 	struct cifs_io_parms io_parms = {0};
2140 
2141 	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2142 		 write_size, *offset, dentry);
2143 
2144 	tcon = tlink_tcon(open_file->tlink);
2145 	server = tcon->ses->server;
2146 
2147 	if (!server->ops->sync_write)
2148 		return -ENOSYS;
2149 
2150 	xid = get_xid();
2151 
2152 	for (total_written = 0; write_size > total_written;
2153 	     total_written += bytes_written) {
2154 		rc = -EAGAIN;
2155 		while (rc == -EAGAIN) {
2156 			struct kvec iov[2];
2157 			unsigned int len;
2158 
2159 			if (open_file->invalidHandle) {
2160 				/* we could deadlock if we called
2161 				   filemap_fdatawait from here so tell
2162 				   reopen_file not to flush data to
2163 				   server now */
2164 				rc = cifs_reopen_file(open_file, false);
2165 				if (rc != 0)
2166 					break;
2167 			}
2168 
2169 			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2170 				  (unsigned int)write_size - total_written);
2171 			/* iov[0] is reserved for smb header */
2172 			iov[1].iov_base = (char *)write_data + total_written;
2173 			iov[1].iov_len = len;
2174 			io_parms.pid = pid;
2175 			io_parms.tcon = tcon;
2176 			io_parms.offset = *offset;
2177 			io_parms.length = len;
2178 			rc = server->ops->sync_write(xid, &open_file->fid,
2179 					&io_parms, &bytes_written, iov, 1);
2180 		}
2181 		if (rc || (bytes_written == 0)) {
2182 			if (total_written)
2183 				break;
2184 			else {
2185 				free_xid(xid);
2186 				return rc;
2187 			}
2188 		} else {
2189 			spin_lock(&d_inode(dentry)->i_lock);
2190 			cifs_update_eof(cifsi, *offset, bytes_written);
2191 			spin_unlock(&d_inode(dentry)->i_lock);
2192 			*offset += bytes_written;
2193 		}
2194 	}
2195 
2196 	cifs_stats_bytes_written(tcon, total_written);
2197 
2198 	if (total_written > 0) {
2199 		spin_lock(&d_inode(dentry)->i_lock);
2200 		if (*offset > d_inode(dentry)->i_size) {
2201 			i_size_write(d_inode(dentry), *offset);
2202 			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2203 		}
2204 		spin_unlock(&d_inode(dentry)->i_lock);
2205 	}
2206 	mark_inode_dirty_sync(d_inode(dentry));
2207 	free_xid(xid);
2208 	return total_written;
2209 }
2210 
2211 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2212 					bool fsuid_only)
2213 {
2214 	struct cifsFileInfo *open_file = NULL;
2215 	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2216 
2217 	/* only filter by fsuid on multiuser mounts */
2218 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2219 		fsuid_only = false;
2220 
2221 	spin_lock(&cifs_inode->open_file_lock);
2222 	/* we could simply get the first_list_entry since write-only entries
2223 	   are always at the end of the list but since the first entry might
2224 	   have a close pending, we go through the whole list */
2225 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2226 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2227 			continue;
2228 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2229 			if ((!open_file->invalidHandle)) {
2230 				/* found a good file */
2231 				/* lock it so it will not be closed on us */
2232 				cifsFileInfo_get(open_file);
2233 				spin_unlock(&cifs_inode->open_file_lock);
2234 				return open_file;
2235 			} /* else might as well continue, and look for
2236 			     another, or simply have the caller reopen it
2237 			     again rather than trying to fix this handle */
2238 		} else /* write only file */
2239 			break; /* write only files are last so must be done */
2240 	}
2241 	spin_unlock(&cifs_inode->open_file_lock);
2242 	return NULL;
2243 }
2244 
2245 /* Return -EBADF if no handle is found and general rc otherwise */
2246 int
2247 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2248 		       struct cifsFileInfo **ret_file)
2249 {
2250 	struct cifsFileInfo *open_file, *inv_file = NULL;
2251 	struct cifs_sb_info *cifs_sb;
2252 	bool any_available = false;
2253 	int rc = -EBADF;
2254 	unsigned int refind = 0;
2255 	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2256 	bool with_delete = flags & FIND_WR_WITH_DELETE;
2257 	*ret_file = NULL;
2258 
2259 	/*
2260 	 * Having a null inode here (because mapping->host was set to zero by
2261 	 * the VFS or MM) should not happen but we had reports of on oops (due
2262 	 * to it being zero) during stress testcases so we need to check for it
2263 	 */
2264 
2265 	if (cifs_inode == NULL) {
2266 		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2267 		dump_stack();
2268 		return rc;
2269 	}
2270 
2271 	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2272 
2273 	/* only filter by fsuid on multiuser mounts */
2274 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2275 		fsuid_only = false;
2276 
2277 	spin_lock(&cifs_inode->open_file_lock);
2278 refind_writable:
2279 	if (refind > MAX_REOPEN_ATT) {
2280 		spin_unlock(&cifs_inode->open_file_lock);
2281 		return rc;
2282 	}
2283 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2284 		if (!any_available && open_file->pid != current->tgid)
2285 			continue;
2286 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2287 			continue;
2288 		if (with_delete && !(open_file->fid.access & DELETE))
2289 			continue;
2290 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2291 			if (!open_file->invalidHandle) {
2292 				/* found a good writable file */
2293 				cifsFileInfo_get(open_file);
2294 				spin_unlock(&cifs_inode->open_file_lock);
2295 				*ret_file = open_file;
2296 				return 0;
2297 			} else {
2298 				if (!inv_file)
2299 					inv_file = open_file;
2300 			}
2301 		}
2302 	}
2303 	/* couldn't find useable FH with same pid, try any available */
2304 	if (!any_available) {
2305 		any_available = true;
2306 		goto refind_writable;
2307 	}
2308 
2309 	if (inv_file) {
2310 		any_available = false;
2311 		cifsFileInfo_get(inv_file);
2312 	}
2313 
2314 	spin_unlock(&cifs_inode->open_file_lock);
2315 
2316 	if (inv_file) {
2317 		rc = cifs_reopen_file(inv_file, false);
2318 		if (!rc) {
2319 			*ret_file = inv_file;
2320 			return 0;
2321 		}
2322 
2323 		spin_lock(&cifs_inode->open_file_lock);
2324 		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2325 		spin_unlock(&cifs_inode->open_file_lock);
2326 		cifsFileInfo_put(inv_file);
2327 		++refind;
2328 		inv_file = NULL;
2329 		spin_lock(&cifs_inode->open_file_lock);
2330 		goto refind_writable;
2331 	}
2332 
2333 	return rc;
2334 }
2335 
2336 struct cifsFileInfo *
2337 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2338 {
2339 	struct cifsFileInfo *cfile;
2340 	int rc;
2341 
2342 	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2343 	if (rc)
2344 		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2345 
2346 	return cfile;
2347 }
2348 
2349 int
2350 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2351 		       int flags,
2352 		       struct cifsFileInfo **ret_file)
2353 {
2354 	struct cifsFileInfo *cfile;
2355 	void *page = alloc_dentry_path();
2356 
2357 	*ret_file = NULL;
2358 
2359 	spin_lock(&tcon->open_file_lock);
2360 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2361 		struct cifsInodeInfo *cinode;
2362 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2363 		if (IS_ERR(full_path)) {
2364 			spin_unlock(&tcon->open_file_lock);
2365 			free_dentry_path(page);
2366 			return PTR_ERR(full_path);
2367 		}
2368 		if (strcmp(full_path, name))
2369 			continue;
2370 
2371 		cinode = CIFS_I(d_inode(cfile->dentry));
2372 		spin_unlock(&tcon->open_file_lock);
2373 		free_dentry_path(page);
2374 		return cifs_get_writable_file(cinode, flags, ret_file);
2375 	}
2376 
2377 	spin_unlock(&tcon->open_file_lock);
2378 	free_dentry_path(page);
2379 	return -ENOENT;
2380 }
2381 
2382 int
2383 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2384 		       struct cifsFileInfo **ret_file)
2385 {
2386 	struct cifsFileInfo *cfile;
2387 	void *page = alloc_dentry_path();
2388 
2389 	*ret_file = NULL;
2390 
2391 	spin_lock(&tcon->open_file_lock);
2392 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2393 		struct cifsInodeInfo *cinode;
2394 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2395 		if (IS_ERR(full_path)) {
2396 			spin_unlock(&tcon->open_file_lock);
2397 			free_dentry_path(page);
2398 			return PTR_ERR(full_path);
2399 		}
2400 		if (strcmp(full_path, name))
2401 			continue;
2402 
2403 		cinode = CIFS_I(d_inode(cfile->dentry));
2404 		spin_unlock(&tcon->open_file_lock);
2405 		free_dentry_path(page);
2406 		*ret_file = find_readable_file(cinode, 0);
2407 		return *ret_file ? 0 : -ENOENT;
2408 	}
2409 
2410 	spin_unlock(&tcon->open_file_lock);
2411 	free_dentry_path(page);
2412 	return -ENOENT;
2413 }
2414 
2415 void
2416 cifs_writedata_release(struct kref *refcount)
2417 {
2418 	struct cifs_writedata *wdata = container_of(refcount,
2419 					struct cifs_writedata, refcount);
2420 #ifdef CONFIG_CIFS_SMB_DIRECT
2421 	if (wdata->mr) {
2422 		smbd_deregister_mr(wdata->mr);
2423 		wdata->mr = NULL;
2424 	}
2425 #endif
2426 
2427 	if (wdata->cfile)
2428 		cifsFileInfo_put(wdata->cfile);
2429 
2430 	kfree(wdata);
2431 }
2432 
2433 /*
2434  * Write failed with a retryable error. Resend the write request. It's also
2435  * possible that the page was redirtied so re-clean the page.
2436  */
2437 static void
2438 cifs_writev_requeue(struct cifs_writedata *wdata)
2439 {
2440 	int rc = 0;
2441 	struct inode *inode = d_inode(wdata->cfile->dentry);
2442 	struct TCP_Server_Info *server;
2443 	unsigned int rest_len = wdata->bytes;
2444 	loff_t fpos = wdata->offset;
2445 
2446 	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2447 	do {
2448 		struct cifs_writedata *wdata2;
2449 		unsigned int wsize, cur_len;
2450 
2451 		wsize = server->ops->wp_retry_size(inode);
2452 		if (wsize < rest_len) {
2453 			if (wsize < PAGE_SIZE) {
2454 				rc = -EOPNOTSUPP;
2455 				break;
2456 			}
2457 			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2458 		} else {
2459 			cur_len = rest_len;
2460 		}
2461 
2462 		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2463 		if (!wdata2) {
2464 			rc = -ENOMEM;
2465 			break;
2466 		}
2467 
2468 		wdata2->sync_mode = wdata->sync_mode;
2469 		wdata2->offset	= fpos;
2470 		wdata2->bytes	= cur_len;
2471 		wdata2->iter	= wdata->iter;
2472 
2473 		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2474 		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2475 
2476 		if (iov_iter_is_xarray(&wdata2->iter))
2477 			/* Check for pages having been redirtied and clean
2478 			 * them.  We can do this by walking the xarray.  If
2479 			 * it's not an xarray, then it's a DIO and we shouldn't
2480 			 * be mucking around with the page bits.
2481 			 */
2482 			cifs_undirty_folios(inode, fpos, cur_len);
2483 
2484 		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2485 					    &wdata2->cfile);
2486 		if (!wdata2->cfile) {
2487 			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2488 				 rc);
2489 			if (!is_retryable_error(rc))
2490 				rc = -EBADF;
2491 		} else {
2492 			wdata2->pid = wdata2->cfile->pid;
2493 			rc = server->ops->async_writev(wdata2,
2494 						       cifs_writedata_release);
2495 		}
2496 
2497 		kref_put(&wdata2->refcount, cifs_writedata_release);
2498 		if (rc) {
2499 			if (is_retryable_error(rc))
2500 				continue;
2501 			fpos += cur_len;
2502 			rest_len -= cur_len;
2503 			break;
2504 		}
2505 
2506 		fpos += cur_len;
2507 		rest_len -= cur_len;
2508 	} while (rest_len > 0);
2509 
2510 	/* Clean up remaining pages from the original wdata */
2511 	if (iov_iter_is_xarray(&wdata->iter))
2512 		cifs_pages_write_failed(inode, fpos, rest_len);
2513 
2514 	if (rc != 0 && !is_retryable_error(rc))
2515 		mapping_set_error(inode->i_mapping, rc);
2516 	kref_put(&wdata->refcount, cifs_writedata_release);
2517 }
2518 
2519 void
2520 cifs_writev_complete(struct work_struct *work)
2521 {
2522 	struct cifs_writedata *wdata = container_of(work,
2523 						struct cifs_writedata, work);
2524 	struct inode *inode = d_inode(wdata->cfile->dentry);
2525 
2526 	if (wdata->result == 0) {
2527 		spin_lock(&inode->i_lock);
2528 		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2529 		spin_unlock(&inode->i_lock);
2530 		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2531 					 wdata->bytes);
2532 	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2533 		return cifs_writev_requeue(wdata);
2534 
2535 	if (wdata->result == -EAGAIN)
2536 		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2537 	else if (wdata->result < 0)
2538 		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2539 	else
2540 		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2541 
2542 	if (wdata->result != -EAGAIN)
2543 		mapping_set_error(inode->i_mapping, wdata->result);
2544 	kref_put(&wdata->refcount, cifs_writedata_release);
2545 }
2546 
2547 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2548 {
2549 	struct cifs_writedata *wdata;
2550 
2551 	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2552 	if (wdata != NULL) {
2553 		kref_init(&wdata->refcount);
2554 		INIT_LIST_HEAD(&wdata->list);
2555 		init_completion(&wdata->done);
2556 		INIT_WORK(&wdata->work, complete);
2557 	}
2558 	return wdata;
2559 }
2560 
2561 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2562 {
2563 	struct address_space *mapping = page->mapping;
2564 	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2565 	char *write_data;
2566 	int rc = -EFAULT;
2567 	int bytes_written = 0;
2568 	struct inode *inode;
2569 	struct cifsFileInfo *open_file;
2570 
2571 	if (!mapping || !mapping->host)
2572 		return -EFAULT;
2573 
2574 	inode = page->mapping->host;
2575 
2576 	offset += (loff_t)from;
2577 	write_data = kmap(page);
2578 	write_data += from;
2579 
2580 	if ((to > PAGE_SIZE) || (from > to)) {
2581 		kunmap(page);
2582 		return -EIO;
2583 	}
2584 
2585 	/* racing with truncate? */
2586 	if (offset > mapping->host->i_size) {
2587 		kunmap(page);
2588 		return 0; /* don't care */
2589 	}
2590 
2591 	/* check to make sure that we are not extending the file */
2592 	if (mapping->host->i_size - offset < (loff_t)to)
2593 		to = (unsigned)(mapping->host->i_size - offset);
2594 
2595 	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2596 				    &open_file);
2597 	if (!rc) {
2598 		bytes_written = cifs_write(open_file, open_file->pid,
2599 					   write_data, to - from, &offset);
2600 		cifsFileInfo_put(open_file);
2601 		/* Does mm or vfs already set times? */
2602 		simple_inode_init_ts(inode);
2603 		if ((bytes_written > 0) && (offset))
2604 			rc = 0;
2605 		else if (bytes_written < 0)
2606 			rc = bytes_written;
2607 		else
2608 			rc = -EFAULT;
2609 	} else {
2610 		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2611 		if (!is_retryable_error(rc))
2612 			rc = -EIO;
2613 	}
2614 
2615 	kunmap(page);
2616 	return rc;
2617 }
2618 
2619 /*
2620  * Extend the region to be written back to include subsequent contiguously
2621  * dirty pages if possible, but don't sleep while doing so.
2622  */
2623 static void cifs_extend_writeback(struct address_space *mapping,
2624 				  long *_count,
2625 				  loff_t start,
2626 				  int max_pages,
2627 				  size_t max_len,
2628 				  unsigned int *_len)
2629 {
2630 	struct folio_batch batch;
2631 	struct folio *folio;
2632 	unsigned int psize, nr_pages;
2633 	size_t len = *_len;
2634 	pgoff_t index = (start + len) / PAGE_SIZE;
2635 	bool stop = true;
2636 	unsigned int i;
2637 	XA_STATE(xas, &mapping->i_pages, index);
2638 
2639 	folio_batch_init(&batch);
2640 
2641 	do {
2642 		/* Firstly, we gather up a batch of contiguous dirty pages
2643 		 * under the RCU read lock - but we can't clear the dirty flags
2644 		 * there if any of those pages are mapped.
2645 		 */
2646 		rcu_read_lock();
2647 
2648 		xas_for_each(&xas, folio, ULONG_MAX) {
2649 			stop = true;
2650 			if (xas_retry(&xas, folio))
2651 				continue;
2652 			if (xa_is_value(folio))
2653 				break;
2654 			if (folio->index != index)
2655 				break;
2656 			if (!folio_try_get_rcu(folio)) {
2657 				xas_reset(&xas);
2658 				continue;
2659 			}
2660 			nr_pages = folio_nr_pages(folio);
2661 			if (nr_pages > max_pages)
2662 				break;
2663 
2664 			/* Has the page moved or been split? */
2665 			if (unlikely(folio != xas_reload(&xas))) {
2666 				folio_put(folio);
2667 				break;
2668 			}
2669 
2670 			if (!folio_trylock(folio)) {
2671 				folio_put(folio);
2672 				break;
2673 			}
2674 			if (!folio_test_dirty(folio) || folio_test_writeback(folio)) {
2675 				folio_unlock(folio);
2676 				folio_put(folio);
2677 				break;
2678 			}
2679 
2680 			max_pages -= nr_pages;
2681 			psize = folio_size(folio);
2682 			len += psize;
2683 			stop = false;
2684 			if (max_pages <= 0 || len >= max_len || *_count <= 0)
2685 				stop = true;
2686 
2687 			index += nr_pages;
2688 			if (!folio_batch_add(&batch, folio))
2689 				break;
2690 			if (stop)
2691 				break;
2692 		}
2693 
2694 		if (!stop)
2695 			xas_pause(&xas);
2696 		rcu_read_unlock();
2697 
2698 		/* Now, if we obtained any pages, we can shift them to being
2699 		 * writable and mark them for caching.
2700 		 */
2701 		if (!folio_batch_count(&batch))
2702 			break;
2703 
2704 		for (i = 0; i < folio_batch_count(&batch); i++) {
2705 			folio = batch.folios[i];
2706 			/* The folio should be locked, dirty and not undergoing
2707 			 * writeback from the loop above.
2708 			 */
2709 			if (!folio_clear_dirty_for_io(folio))
2710 				WARN_ON(1);
2711 			folio_start_writeback(folio);
2712 
2713 			*_count -= folio_nr_pages(folio);
2714 			folio_unlock(folio);
2715 		}
2716 
2717 		folio_batch_release(&batch);
2718 		cond_resched();
2719 	} while (!stop);
2720 
2721 	*_len = len;
2722 }
2723 
2724 /*
2725  * Write back the locked page and any subsequent non-locked dirty pages.
2726  */
2727 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2728 						 struct writeback_control *wbc,
2729 						 struct folio *folio,
2730 						 loff_t start, loff_t end)
2731 {
2732 	struct inode *inode = mapping->host;
2733 	struct TCP_Server_Info *server;
2734 	struct cifs_writedata *wdata;
2735 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2736 	struct cifs_credits credits_on_stack;
2737 	struct cifs_credits *credits = &credits_on_stack;
2738 	struct cifsFileInfo *cfile = NULL;
2739 	unsigned int xid, wsize, len;
2740 	loff_t i_size = i_size_read(inode);
2741 	size_t max_len;
2742 	long count = wbc->nr_to_write;
2743 	int rc;
2744 
2745 	/* The folio should be locked, dirty and not undergoing writeback. */
2746 	folio_start_writeback(folio);
2747 
2748 	count -= folio_nr_pages(folio);
2749 	len = folio_size(folio);
2750 
2751 	xid = get_xid();
2752 	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2753 
2754 	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2755 	if (rc) {
2756 		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2757 		goto err_xid;
2758 	}
2759 
2760 	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2761 					   &wsize, credits);
2762 	if (rc != 0)
2763 		goto err_close;
2764 
2765 	wdata = cifs_writedata_alloc(cifs_writev_complete);
2766 	if (!wdata) {
2767 		rc = -ENOMEM;
2768 		goto err_uncredit;
2769 	}
2770 
2771 	wdata->sync_mode = wbc->sync_mode;
2772 	wdata->offset = folio_pos(folio);
2773 	wdata->pid = cfile->pid;
2774 	wdata->credits = credits_on_stack;
2775 	wdata->cfile = cfile;
2776 	wdata->server = server;
2777 	cfile = NULL;
2778 
2779 	/* Find all consecutive lockable dirty pages, stopping when we find a
2780 	 * page that is not immediately lockable, is not dirty or is missing,
2781 	 * or we reach the end of the range.
2782 	 */
2783 	if (start < i_size) {
2784 		/* Trim the write to the EOF; the extra data is ignored.  Also
2785 		 * put an upper limit on the size of a single storedata op.
2786 		 */
2787 		max_len = wsize;
2788 		max_len = min_t(unsigned long long, max_len, end - start + 1);
2789 		max_len = min_t(unsigned long long, max_len, i_size - start);
2790 
2791 		if (len < max_len) {
2792 			int max_pages = INT_MAX;
2793 
2794 #ifdef CONFIG_CIFS_SMB_DIRECT
2795 			if (server->smbd_conn)
2796 				max_pages = server->smbd_conn->max_frmr_depth;
2797 #endif
2798 			max_pages -= folio_nr_pages(folio);
2799 
2800 			if (max_pages > 0)
2801 				cifs_extend_writeback(mapping, &count, start,
2802 						      max_pages, max_len, &len);
2803 		}
2804 		len = min_t(loff_t, len, max_len);
2805 	}
2806 
2807 	wdata->bytes = len;
2808 
2809 	/* We now have a contiguous set of dirty pages, each with writeback
2810 	 * set; the first page is still locked at this point, but all the rest
2811 	 * have been unlocked.
2812 	 */
2813 	folio_unlock(folio);
2814 
2815 	if (start < i_size) {
2816 		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2817 				start, len);
2818 
2819 		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2820 		if (rc)
2821 			goto err_wdata;
2822 
2823 		if (wdata->cfile->invalidHandle)
2824 			rc = -EAGAIN;
2825 		else
2826 			rc = wdata->server->ops->async_writev(wdata,
2827 							      cifs_writedata_release);
2828 		if (rc >= 0) {
2829 			kref_put(&wdata->refcount, cifs_writedata_release);
2830 			goto err_close;
2831 		}
2832 	} else {
2833 		/* The dirty region was entirely beyond the EOF. */
2834 		cifs_pages_written_back(inode, start, len);
2835 		rc = 0;
2836 	}
2837 
2838 err_wdata:
2839 	kref_put(&wdata->refcount, cifs_writedata_release);
2840 err_uncredit:
2841 	add_credits_and_wake_if(server, credits, 0);
2842 err_close:
2843 	if (cfile)
2844 		cifsFileInfo_put(cfile);
2845 err_xid:
2846 	free_xid(xid);
2847 	if (rc == 0) {
2848 		wbc->nr_to_write = count;
2849 		rc = len;
2850 	} else if (is_retryable_error(rc)) {
2851 		cifs_pages_write_redirty(inode, start, len);
2852 	} else {
2853 		cifs_pages_write_failed(inode, start, len);
2854 		mapping_set_error(mapping, rc);
2855 	}
2856 	/* Indication to update ctime and mtime as close is deferred */
2857 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2858 	return rc;
2859 }
2860 
2861 /*
2862  * write a region of pages back to the server
2863  */
2864 static int cifs_writepages_region(struct address_space *mapping,
2865 				  struct writeback_control *wbc,
2866 				  loff_t start, loff_t end, loff_t *_next)
2867 {
2868 	struct folio_batch fbatch;
2869 	int skips = 0;
2870 
2871 	folio_batch_init(&fbatch);
2872 	do {
2873 		int nr;
2874 		pgoff_t index = start / PAGE_SIZE;
2875 
2876 		nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
2877 					    PAGECACHE_TAG_DIRTY, &fbatch);
2878 		if (!nr)
2879 			break;
2880 
2881 		for (int i = 0; i < nr; i++) {
2882 			ssize_t ret;
2883 			struct folio *folio = fbatch.folios[i];
2884 
2885 redo_folio:
2886 			start = folio_pos(folio); /* May regress with THPs */
2887 
2888 			/* At this point we hold neither the i_pages lock nor the
2889 			 * page lock: the page may be truncated or invalidated
2890 			 * (changing page->mapping to NULL), or even swizzled
2891 			 * back from swapper_space to tmpfs file mapping
2892 			 */
2893 			if (wbc->sync_mode != WB_SYNC_NONE) {
2894 				ret = folio_lock_killable(folio);
2895 				if (ret < 0)
2896 					goto write_error;
2897 			} else {
2898 				if (!folio_trylock(folio))
2899 					goto skip_write;
2900 			}
2901 
2902 			if (folio->mapping != mapping ||
2903 			    !folio_test_dirty(folio)) {
2904 				start += folio_size(folio);
2905 				folio_unlock(folio);
2906 				continue;
2907 			}
2908 
2909 			if (folio_test_writeback(folio) ||
2910 			    folio_test_fscache(folio)) {
2911 				folio_unlock(folio);
2912 				if (wbc->sync_mode == WB_SYNC_NONE)
2913 					goto skip_write;
2914 
2915 				folio_wait_writeback(folio);
2916 #ifdef CONFIG_CIFS_FSCACHE
2917 				folio_wait_fscache(folio);
2918 #endif
2919 				goto redo_folio;
2920 			}
2921 
2922 			if (!folio_clear_dirty_for_io(folio))
2923 				/* We hold the page lock - it should've been dirty. */
2924 				WARN_ON(1);
2925 
2926 			ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
2927 			if (ret < 0)
2928 				goto write_error;
2929 
2930 			start += ret;
2931 			continue;
2932 
2933 write_error:
2934 			folio_batch_release(&fbatch);
2935 			*_next = start;
2936 			return ret;
2937 
2938 skip_write:
2939 			/*
2940 			 * Too many skipped writes, or need to reschedule?
2941 			 * Treat it as a write error without an error code.
2942 			 */
2943 			if (skips >= 5 || need_resched()) {
2944 				ret = 0;
2945 				goto write_error;
2946 			}
2947 
2948 			/* Otherwise, just skip that folio and go on to the next */
2949 			skips++;
2950 			start += folio_size(folio);
2951 			continue;
2952 		}
2953 
2954 		folio_batch_release(&fbatch);
2955 		cond_resched();
2956 	} while (wbc->nr_to_write > 0);
2957 
2958 	*_next = start;
2959 	return 0;
2960 }
2961 
2962 /*
2963  * Write some of the pending data back to the server
2964  */
2965 static int cifs_writepages(struct address_space *mapping,
2966 			   struct writeback_control *wbc)
2967 {
2968 	loff_t start, next;
2969 	int ret;
2970 
2971 	/* We have to be careful as we can end up racing with setattr()
2972 	 * truncating the pagecache since the caller doesn't take a lock here
2973 	 * to prevent it.
2974 	 */
2975 
2976 	if (wbc->range_cyclic) {
2977 		start = mapping->writeback_index * PAGE_SIZE;
2978 		ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
2979 		if (ret == 0) {
2980 			mapping->writeback_index = next / PAGE_SIZE;
2981 			if (start > 0 && wbc->nr_to_write > 0) {
2982 				ret = cifs_writepages_region(mapping, wbc, 0,
2983 							     start, &next);
2984 				if (ret == 0)
2985 					mapping->writeback_index =
2986 						next / PAGE_SIZE;
2987 			}
2988 		}
2989 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
2990 		ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
2991 		if (wbc->nr_to_write > 0 && ret == 0)
2992 			mapping->writeback_index = next / PAGE_SIZE;
2993 	} else {
2994 		ret = cifs_writepages_region(mapping, wbc,
2995 					     wbc->range_start, wbc->range_end, &next);
2996 	}
2997 
2998 	return ret;
2999 }
3000 
3001 static int
3002 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3003 {
3004 	int rc;
3005 	unsigned int xid;
3006 
3007 	xid = get_xid();
3008 /* BB add check for wbc flags */
3009 	get_page(page);
3010 	if (!PageUptodate(page))
3011 		cifs_dbg(FYI, "ppw - page not up to date\n");
3012 
3013 	/*
3014 	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3015 	 *
3016 	 * A writepage() implementation always needs to do either this,
3017 	 * or re-dirty the page with "redirty_page_for_writepage()" in
3018 	 * the case of a failure.
3019 	 *
3020 	 * Just unlocking the page will cause the radix tree tag-bits
3021 	 * to fail to update with the state of the page correctly.
3022 	 */
3023 	set_page_writeback(page);
3024 retry_write:
3025 	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3026 	if (is_retryable_error(rc)) {
3027 		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3028 			goto retry_write;
3029 		redirty_page_for_writepage(wbc, page);
3030 	} else if (rc != 0) {
3031 		SetPageError(page);
3032 		mapping_set_error(page->mapping, rc);
3033 	} else {
3034 		SetPageUptodate(page);
3035 	}
3036 	end_page_writeback(page);
3037 	put_page(page);
3038 	free_xid(xid);
3039 	return rc;
3040 }
3041 
3042 static int cifs_write_end(struct file *file, struct address_space *mapping,
3043 			loff_t pos, unsigned len, unsigned copied,
3044 			struct page *page, void *fsdata)
3045 {
3046 	int rc;
3047 	struct inode *inode = mapping->host;
3048 	struct cifsFileInfo *cfile = file->private_data;
3049 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3050 	struct folio *folio = page_folio(page);
3051 	__u32 pid;
3052 
3053 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3054 		pid = cfile->pid;
3055 	else
3056 		pid = current->tgid;
3057 
3058 	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3059 		 page, pos, copied);
3060 
3061 	if (folio_test_checked(folio)) {
3062 		if (copied == len)
3063 			folio_mark_uptodate(folio);
3064 		folio_clear_checked(folio);
3065 	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3066 		folio_mark_uptodate(folio);
3067 
3068 	if (!folio_test_uptodate(folio)) {
3069 		char *page_data;
3070 		unsigned offset = pos & (PAGE_SIZE - 1);
3071 		unsigned int xid;
3072 
3073 		xid = get_xid();
3074 		/* this is probably better than directly calling
3075 		   partialpage_write since in this function the file handle is
3076 		   known which we might as well	leverage */
3077 		/* BB check if anything else missing out of ppw
3078 		   such as updating last write time */
3079 		page_data = kmap(page);
3080 		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3081 		/* if (rc < 0) should we set writebehind rc? */
3082 		kunmap(page);
3083 
3084 		free_xid(xid);
3085 	} else {
3086 		rc = copied;
3087 		pos += copied;
3088 		set_page_dirty(page);
3089 	}
3090 
3091 	if (rc > 0) {
3092 		spin_lock(&inode->i_lock);
3093 		if (pos > inode->i_size) {
3094 			i_size_write(inode, pos);
3095 			inode->i_blocks = (512 - 1 + pos) >> 9;
3096 		}
3097 		spin_unlock(&inode->i_lock);
3098 	}
3099 
3100 	unlock_page(page);
3101 	put_page(page);
3102 	/* Indication to update ctime and mtime as close is deferred */
3103 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3104 
3105 	return rc;
3106 }
3107 
3108 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3109 		      int datasync)
3110 {
3111 	unsigned int xid;
3112 	int rc = 0;
3113 	struct cifs_tcon *tcon;
3114 	struct TCP_Server_Info *server;
3115 	struct cifsFileInfo *smbfile = file->private_data;
3116 	struct inode *inode = file_inode(file);
3117 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3118 
3119 	rc = file_write_and_wait_range(file, start, end);
3120 	if (rc) {
3121 		trace_cifs_fsync_err(inode->i_ino, rc);
3122 		return rc;
3123 	}
3124 
3125 	xid = get_xid();
3126 
3127 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3128 		 file, datasync);
3129 
3130 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3131 		rc = cifs_zap_mapping(inode);
3132 		if (rc) {
3133 			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3134 			rc = 0; /* don't care about it in fsync */
3135 		}
3136 	}
3137 
3138 	tcon = tlink_tcon(smbfile->tlink);
3139 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3140 		server = tcon->ses->server;
3141 		if (server->ops->flush == NULL) {
3142 			rc = -ENOSYS;
3143 			goto strict_fsync_exit;
3144 		}
3145 
3146 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3147 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3148 			if (smbfile) {
3149 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3150 				cifsFileInfo_put(smbfile);
3151 			} else
3152 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3153 		} else
3154 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3155 	}
3156 
3157 strict_fsync_exit:
3158 	free_xid(xid);
3159 	return rc;
3160 }
3161 
3162 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3163 {
3164 	unsigned int xid;
3165 	int rc = 0;
3166 	struct cifs_tcon *tcon;
3167 	struct TCP_Server_Info *server;
3168 	struct cifsFileInfo *smbfile = file->private_data;
3169 	struct inode *inode = file_inode(file);
3170 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3171 
3172 	rc = file_write_and_wait_range(file, start, end);
3173 	if (rc) {
3174 		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3175 		return rc;
3176 	}
3177 
3178 	xid = get_xid();
3179 
3180 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3181 		 file, datasync);
3182 
3183 	tcon = tlink_tcon(smbfile->tlink);
3184 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3185 		server = tcon->ses->server;
3186 		if (server->ops->flush == NULL) {
3187 			rc = -ENOSYS;
3188 			goto fsync_exit;
3189 		}
3190 
3191 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3192 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3193 			if (smbfile) {
3194 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3195 				cifsFileInfo_put(smbfile);
3196 			} else
3197 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3198 		} else
3199 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3200 	}
3201 
3202 fsync_exit:
3203 	free_xid(xid);
3204 	return rc;
3205 }
3206 
3207 /*
3208  * As file closes, flush all cached write data for this inode checking
3209  * for write behind errors.
3210  */
3211 int cifs_flush(struct file *file, fl_owner_t id)
3212 {
3213 	struct inode *inode = file_inode(file);
3214 	int rc = 0;
3215 
3216 	if (file->f_mode & FMODE_WRITE)
3217 		rc = filemap_write_and_wait(inode->i_mapping);
3218 
3219 	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3220 	if (rc) {
3221 		/* get more nuanced writeback errors */
3222 		rc = filemap_check_wb_err(file->f_mapping, 0);
3223 		trace_cifs_flush_err(inode->i_ino, rc);
3224 	}
3225 	return rc;
3226 }
3227 
3228 static void
3229 cifs_uncached_writedata_release(struct kref *refcount)
3230 {
3231 	struct cifs_writedata *wdata = container_of(refcount,
3232 					struct cifs_writedata, refcount);
3233 
3234 	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3235 	cifs_writedata_release(refcount);
3236 }
3237 
3238 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3239 
3240 static void
3241 cifs_uncached_writev_complete(struct work_struct *work)
3242 {
3243 	struct cifs_writedata *wdata = container_of(work,
3244 					struct cifs_writedata, work);
3245 	struct inode *inode = d_inode(wdata->cfile->dentry);
3246 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3247 
3248 	spin_lock(&inode->i_lock);
3249 	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3250 	if (cifsi->netfs.remote_i_size > inode->i_size)
3251 		i_size_write(inode, cifsi->netfs.remote_i_size);
3252 	spin_unlock(&inode->i_lock);
3253 
3254 	complete(&wdata->done);
3255 	collect_uncached_write_data(wdata->ctx);
3256 	/* the below call can possibly free the last ref to aio ctx */
3257 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3258 }
3259 
3260 static int
3261 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3262 	struct cifs_aio_ctx *ctx)
3263 {
3264 	unsigned int wsize;
3265 	struct cifs_credits credits;
3266 	int rc;
3267 	struct TCP_Server_Info *server = wdata->server;
3268 
3269 	do {
3270 		if (wdata->cfile->invalidHandle) {
3271 			rc = cifs_reopen_file(wdata->cfile, false);
3272 			if (rc == -EAGAIN)
3273 				continue;
3274 			else if (rc)
3275 				break;
3276 		}
3277 
3278 
3279 		/*
3280 		 * Wait for credits to resend this wdata.
3281 		 * Note: we are attempting to resend the whole wdata not in
3282 		 * segments
3283 		 */
3284 		do {
3285 			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3286 						&wsize, &credits);
3287 			if (rc)
3288 				goto fail;
3289 
3290 			if (wsize < wdata->bytes) {
3291 				add_credits_and_wake_if(server, &credits, 0);
3292 				msleep(1000);
3293 			}
3294 		} while (wsize < wdata->bytes);
3295 		wdata->credits = credits;
3296 
3297 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3298 
3299 		if (!rc) {
3300 			if (wdata->cfile->invalidHandle)
3301 				rc = -EAGAIN;
3302 			else {
3303 				wdata->replay = true;
3304 #ifdef CONFIG_CIFS_SMB_DIRECT
3305 				if (wdata->mr) {
3306 					wdata->mr->need_invalidate = true;
3307 					smbd_deregister_mr(wdata->mr);
3308 					wdata->mr = NULL;
3309 				}
3310 #endif
3311 				rc = server->ops->async_writev(wdata,
3312 					cifs_uncached_writedata_release);
3313 			}
3314 		}
3315 
3316 		/* If the write was successfully sent, we are done */
3317 		if (!rc) {
3318 			list_add_tail(&wdata->list, wdata_list);
3319 			return 0;
3320 		}
3321 
3322 		/* Roll back credits and retry if needed */
3323 		add_credits_and_wake_if(server, &wdata->credits, 0);
3324 	} while (rc == -EAGAIN);
3325 
3326 fail:
3327 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3328 	return rc;
3329 }
3330 
3331 /*
3332  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3333  * size and maximum number of segments.
3334  */
3335 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3336 				     size_t max_segs, unsigned int *_nsegs)
3337 {
3338 	const struct bio_vec *bvecs = iter->bvec;
3339 	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3340 	size_t len, span = 0, n = iter->count;
3341 	size_t skip = iter->iov_offset;
3342 
3343 	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3344 		return 0;
3345 
3346 	while (n && ix < nbv && skip) {
3347 		len = bvecs[ix].bv_len;
3348 		if (skip < len)
3349 			break;
3350 		skip -= len;
3351 		n -= len;
3352 		ix++;
3353 	}
3354 
3355 	while (n && ix < nbv) {
3356 		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3357 		span += len;
3358 		max_size -= len;
3359 		nsegs++;
3360 		ix++;
3361 		if (max_size == 0 || nsegs >= max_segs)
3362 			break;
3363 		skip = 0;
3364 		n -= len;
3365 	}
3366 
3367 	*_nsegs = nsegs;
3368 	return span;
3369 }
3370 
3371 static int
3372 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3373 		     struct cifsFileInfo *open_file,
3374 		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3375 		     struct cifs_aio_ctx *ctx)
3376 {
3377 	int rc = 0;
3378 	size_t cur_len, max_len;
3379 	struct cifs_writedata *wdata;
3380 	pid_t pid;
3381 	struct TCP_Server_Info *server;
3382 	unsigned int xid, max_segs = INT_MAX;
3383 
3384 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3385 		pid = open_file->pid;
3386 	else
3387 		pid = current->tgid;
3388 
3389 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3390 	xid = get_xid();
3391 
3392 #ifdef CONFIG_CIFS_SMB_DIRECT
3393 	if (server->smbd_conn)
3394 		max_segs = server->smbd_conn->max_frmr_depth;
3395 #endif
3396 
3397 	do {
3398 		struct cifs_credits credits_on_stack;
3399 		struct cifs_credits *credits = &credits_on_stack;
3400 		unsigned int wsize, nsegs = 0;
3401 
3402 		if (signal_pending(current)) {
3403 			rc = -EINTR;
3404 			break;
3405 		}
3406 
3407 		if (open_file->invalidHandle) {
3408 			rc = cifs_reopen_file(open_file, false);
3409 			if (rc == -EAGAIN)
3410 				continue;
3411 			else if (rc)
3412 				break;
3413 		}
3414 
3415 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3416 						   &wsize, credits);
3417 		if (rc)
3418 			break;
3419 
3420 		max_len = min_t(const size_t, len, wsize);
3421 		if (!max_len) {
3422 			rc = -EAGAIN;
3423 			add_credits_and_wake_if(server, credits, 0);
3424 			break;
3425 		}
3426 
3427 		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3428 		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3429 			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3430 		if (cur_len == 0) {
3431 			rc = -EIO;
3432 			add_credits_and_wake_if(server, credits, 0);
3433 			break;
3434 		}
3435 
3436 		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3437 		if (!wdata) {
3438 			rc = -ENOMEM;
3439 			add_credits_and_wake_if(server, credits, 0);
3440 			break;
3441 		}
3442 
3443 		wdata->sync_mode = WB_SYNC_ALL;
3444 		wdata->offset	= (__u64)fpos;
3445 		wdata->cfile	= cifsFileInfo_get(open_file);
3446 		wdata->server	= server;
3447 		wdata->pid	= pid;
3448 		wdata->bytes	= cur_len;
3449 		wdata->credits	= credits_on_stack;
3450 		wdata->iter	= *from;
3451 		wdata->ctx	= ctx;
3452 		kref_get(&ctx->refcount);
3453 
3454 		iov_iter_truncate(&wdata->iter, cur_len);
3455 
3456 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3457 
3458 		if (!rc) {
3459 			if (wdata->cfile->invalidHandle)
3460 				rc = -EAGAIN;
3461 			else
3462 				rc = server->ops->async_writev(wdata,
3463 					cifs_uncached_writedata_release);
3464 		}
3465 
3466 		if (rc) {
3467 			add_credits_and_wake_if(server, &wdata->credits, 0);
3468 			kref_put(&wdata->refcount,
3469 				 cifs_uncached_writedata_release);
3470 			if (rc == -EAGAIN)
3471 				continue;
3472 			break;
3473 		}
3474 
3475 		list_add_tail(&wdata->list, wdata_list);
3476 		iov_iter_advance(from, cur_len);
3477 		fpos += cur_len;
3478 		len -= cur_len;
3479 	} while (len > 0);
3480 
3481 	free_xid(xid);
3482 	return rc;
3483 }
3484 
3485 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3486 {
3487 	struct cifs_writedata *wdata, *tmp;
3488 	struct cifs_tcon *tcon;
3489 	struct cifs_sb_info *cifs_sb;
3490 	struct dentry *dentry = ctx->cfile->dentry;
3491 	ssize_t rc;
3492 
3493 	tcon = tlink_tcon(ctx->cfile->tlink);
3494 	cifs_sb = CIFS_SB(dentry->d_sb);
3495 
3496 	mutex_lock(&ctx->aio_mutex);
3497 
3498 	if (list_empty(&ctx->list)) {
3499 		mutex_unlock(&ctx->aio_mutex);
3500 		return;
3501 	}
3502 
3503 	rc = ctx->rc;
3504 	/*
3505 	 * Wait for and collect replies for any successful sends in order of
3506 	 * increasing offset. Once an error is hit, then return without waiting
3507 	 * for any more replies.
3508 	 */
3509 restart_loop:
3510 	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3511 		if (!rc) {
3512 			if (!try_wait_for_completion(&wdata->done)) {
3513 				mutex_unlock(&ctx->aio_mutex);
3514 				return;
3515 			}
3516 
3517 			if (wdata->result)
3518 				rc = wdata->result;
3519 			else
3520 				ctx->total_len += wdata->bytes;
3521 
3522 			/* resend call if it's a retryable error */
3523 			if (rc == -EAGAIN) {
3524 				struct list_head tmp_list;
3525 				struct iov_iter tmp_from = ctx->iter;
3526 
3527 				INIT_LIST_HEAD(&tmp_list);
3528 				list_del_init(&wdata->list);
3529 
3530 				if (ctx->direct_io)
3531 					rc = cifs_resend_wdata(
3532 						wdata, &tmp_list, ctx);
3533 				else {
3534 					iov_iter_advance(&tmp_from,
3535 						 wdata->offset - ctx->pos);
3536 
3537 					rc = cifs_write_from_iter(wdata->offset,
3538 						wdata->bytes, &tmp_from,
3539 						ctx->cfile, cifs_sb, &tmp_list,
3540 						ctx);
3541 
3542 					kref_put(&wdata->refcount,
3543 						cifs_uncached_writedata_release);
3544 				}
3545 
3546 				list_splice(&tmp_list, &ctx->list);
3547 				goto restart_loop;
3548 			}
3549 		}
3550 		list_del_init(&wdata->list);
3551 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3552 	}
3553 
3554 	cifs_stats_bytes_written(tcon, ctx->total_len);
3555 	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3556 
3557 	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3558 
3559 	mutex_unlock(&ctx->aio_mutex);
3560 
3561 	if (ctx->iocb && ctx->iocb->ki_complete)
3562 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3563 	else
3564 		complete(&ctx->done);
3565 }
3566 
3567 static ssize_t __cifs_writev(
3568 	struct kiocb *iocb, struct iov_iter *from, bool direct)
3569 {
3570 	struct file *file = iocb->ki_filp;
3571 	ssize_t total_written = 0;
3572 	struct cifsFileInfo *cfile;
3573 	struct cifs_tcon *tcon;
3574 	struct cifs_sb_info *cifs_sb;
3575 	struct cifs_aio_ctx *ctx;
3576 	int rc;
3577 
3578 	rc = generic_write_checks(iocb, from);
3579 	if (rc <= 0)
3580 		return rc;
3581 
3582 	cifs_sb = CIFS_FILE_SB(file);
3583 	cfile = file->private_data;
3584 	tcon = tlink_tcon(cfile->tlink);
3585 
3586 	if (!tcon->ses->server->ops->async_writev)
3587 		return -ENOSYS;
3588 
3589 	ctx = cifs_aio_ctx_alloc();
3590 	if (!ctx)
3591 		return -ENOMEM;
3592 
3593 	ctx->cfile = cifsFileInfo_get(cfile);
3594 
3595 	if (!is_sync_kiocb(iocb))
3596 		ctx->iocb = iocb;
3597 
3598 	ctx->pos = iocb->ki_pos;
3599 	ctx->direct_io = direct;
3600 	ctx->nr_pinned_pages = 0;
3601 
3602 	if (user_backed_iter(from)) {
3603 		/*
3604 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3605 		 * they contain references to the calling process's virtual
3606 		 * memory layout which won't be available in an async worker
3607 		 * thread.  This also takes a pin on every folio involved.
3608 		 */
3609 		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3610 					     &ctx->iter, 0);
3611 		if (rc < 0) {
3612 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3613 			return rc;
3614 		}
3615 
3616 		ctx->nr_pinned_pages = rc;
3617 		ctx->bv = (void *)ctx->iter.bvec;
3618 		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3619 	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3620 		   !is_sync_kiocb(iocb)) {
3621 		/*
3622 		 * If the op is asynchronous, we need to copy the list attached
3623 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3624 		 * will be pinned by the caller; in any case, we may or may not
3625 		 * be able to pin the pages, so we don't try.
3626 		 */
3627 		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3628 		if (!ctx->bv) {
3629 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3630 			return -ENOMEM;
3631 		}
3632 	} else {
3633 		/*
3634 		 * Otherwise, we just pass the iterator down as-is and rely on
3635 		 * the caller to make sure the pages referred to by the
3636 		 * iterator don't evaporate.
3637 		 */
3638 		ctx->iter = *from;
3639 	}
3640 
3641 	ctx->len = iov_iter_count(&ctx->iter);
3642 
3643 	/* grab a lock here due to read response handlers can access ctx */
3644 	mutex_lock(&ctx->aio_mutex);
3645 
3646 	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3647 				  cfile, cifs_sb, &ctx->list, ctx);
3648 
3649 	/*
3650 	 * If at least one write was successfully sent, then discard any rc
3651 	 * value from the later writes. If the other write succeeds, then
3652 	 * we'll end up returning whatever was written. If it fails, then
3653 	 * we'll get a new rc value from that.
3654 	 */
3655 	if (!list_empty(&ctx->list))
3656 		rc = 0;
3657 
3658 	mutex_unlock(&ctx->aio_mutex);
3659 
3660 	if (rc) {
3661 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3662 		return rc;
3663 	}
3664 
3665 	if (!is_sync_kiocb(iocb)) {
3666 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3667 		return -EIOCBQUEUED;
3668 	}
3669 
3670 	rc = wait_for_completion_killable(&ctx->done);
3671 	if (rc) {
3672 		mutex_lock(&ctx->aio_mutex);
3673 		ctx->rc = rc = -EINTR;
3674 		total_written = ctx->total_len;
3675 		mutex_unlock(&ctx->aio_mutex);
3676 	} else {
3677 		rc = ctx->rc;
3678 		total_written = ctx->total_len;
3679 	}
3680 
3681 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3682 
3683 	if (unlikely(!total_written))
3684 		return rc;
3685 
3686 	iocb->ki_pos += total_written;
3687 	return total_written;
3688 }
3689 
3690 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3691 {
3692 	struct file *file = iocb->ki_filp;
3693 
3694 	cifs_revalidate_mapping(file->f_inode);
3695 	return __cifs_writev(iocb, from, true);
3696 }
3697 
3698 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3699 {
3700 	return __cifs_writev(iocb, from, false);
3701 }
3702 
3703 static ssize_t
3704 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3705 {
3706 	struct file *file = iocb->ki_filp;
3707 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3708 	struct inode *inode = file->f_mapping->host;
3709 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3710 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3711 	ssize_t rc;
3712 
3713 	inode_lock(inode);
3714 	/*
3715 	 * We need to hold the sem to be sure nobody modifies lock list
3716 	 * with a brlock that prevents writing.
3717 	 */
3718 	down_read(&cinode->lock_sem);
3719 
3720 	rc = generic_write_checks(iocb, from);
3721 	if (rc <= 0)
3722 		goto out;
3723 
3724 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3725 				     server->vals->exclusive_lock_type, 0,
3726 				     NULL, CIFS_WRITE_OP))
3727 		rc = __generic_file_write_iter(iocb, from);
3728 	else
3729 		rc = -EACCES;
3730 out:
3731 	up_read(&cinode->lock_sem);
3732 	inode_unlock(inode);
3733 
3734 	if (rc > 0)
3735 		rc = generic_write_sync(iocb, rc);
3736 	return rc;
3737 }
3738 
3739 ssize_t
3740 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3741 {
3742 	struct inode *inode = file_inode(iocb->ki_filp);
3743 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3744 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3745 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3746 						iocb->ki_filp->private_data;
3747 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3748 	ssize_t written;
3749 
3750 	written = cifs_get_writer(cinode);
3751 	if (written)
3752 		return written;
3753 
3754 	if (CIFS_CACHE_WRITE(cinode)) {
3755 		if (cap_unix(tcon->ses) &&
3756 		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3757 		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3758 			written = generic_file_write_iter(iocb, from);
3759 			goto out;
3760 		}
3761 		written = cifs_writev(iocb, from);
3762 		goto out;
3763 	}
3764 	/*
3765 	 * For non-oplocked files in strict cache mode we need to write the data
3766 	 * to the server exactly from the pos to pos+len-1 rather than flush all
3767 	 * affected pages because it may cause a error with mandatory locks on
3768 	 * these pages but not on the region from pos to ppos+len-1.
3769 	 */
3770 	written = cifs_user_writev(iocb, from);
3771 	if (CIFS_CACHE_READ(cinode)) {
3772 		/*
3773 		 * We have read level caching and we have just sent a write
3774 		 * request to the server thus making data in the cache stale.
3775 		 * Zap the cache and set oplock/lease level to NONE to avoid
3776 		 * reading stale data from the cache. All subsequent read
3777 		 * operations will read new data from the server.
3778 		 */
3779 		cifs_zap_mapping(inode);
3780 		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3781 			 inode);
3782 		cinode->oplock = 0;
3783 	}
3784 out:
3785 	cifs_put_writer(cinode);
3786 	return written;
3787 }
3788 
3789 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3790 {
3791 	struct cifs_readdata *rdata;
3792 
3793 	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3794 	if (rdata) {
3795 		kref_init(&rdata->refcount);
3796 		INIT_LIST_HEAD(&rdata->list);
3797 		init_completion(&rdata->done);
3798 		INIT_WORK(&rdata->work, complete);
3799 	}
3800 
3801 	return rdata;
3802 }
3803 
3804 void
3805 cifs_readdata_release(struct kref *refcount)
3806 {
3807 	struct cifs_readdata *rdata = container_of(refcount,
3808 					struct cifs_readdata, refcount);
3809 
3810 	if (rdata->ctx)
3811 		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3812 #ifdef CONFIG_CIFS_SMB_DIRECT
3813 	if (rdata->mr) {
3814 		smbd_deregister_mr(rdata->mr);
3815 		rdata->mr = NULL;
3816 	}
3817 #endif
3818 	if (rdata->cfile)
3819 		cifsFileInfo_put(rdata->cfile);
3820 
3821 	kfree(rdata);
3822 }
3823 
3824 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3825 
3826 static void
3827 cifs_uncached_readv_complete(struct work_struct *work)
3828 {
3829 	struct cifs_readdata *rdata = container_of(work,
3830 						struct cifs_readdata, work);
3831 
3832 	complete(&rdata->done);
3833 	collect_uncached_read_data(rdata->ctx);
3834 	/* the below call can possibly free the last ref to aio ctx */
3835 	kref_put(&rdata->refcount, cifs_readdata_release);
3836 }
3837 
3838 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3839 			struct list_head *rdata_list,
3840 			struct cifs_aio_ctx *ctx)
3841 {
3842 	unsigned int rsize;
3843 	struct cifs_credits credits;
3844 	int rc;
3845 	struct TCP_Server_Info *server;
3846 
3847 	/* XXX: should we pick a new channel here? */
3848 	server = rdata->server;
3849 
3850 	do {
3851 		if (rdata->cfile->invalidHandle) {
3852 			rc = cifs_reopen_file(rdata->cfile, true);
3853 			if (rc == -EAGAIN)
3854 				continue;
3855 			else if (rc)
3856 				break;
3857 		}
3858 
3859 		/*
3860 		 * Wait for credits to resend this rdata.
3861 		 * Note: we are attempting to resend the whole rdata not in
3862 		 * segments
3863 		 */
3864 		do {
3865 			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3866 						&rsize, &credits);
3867 
3868 			if (rc)
3869 				goto fail;
3870 
3871 			if (rsize < rdata->bytes) {
3872 				add_credits_and_wake_if(server, &credits, 0);
3873 				msleep(1000);
3874 			}
3875 		} while (rsize < rdata->bytes);
3876 		rdata->credits = credits;
3877 
3878 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3879 		if (!rc) {
3880 			if (rdata->cfile->invalidHandle)
3881 				rc = -EAGAIN;
3882 			else {
3883 #ifdef CONFIG_CIFS_SMB_DIRECT
3884 				if (rdata->mr) {
3885 					rdata->mr->need_invalidate = true;
3886 					smbd_deregister_mr(rdata->mr);
3887 					rdata->mr = NULL;
3888 				}
3889 #endif
3890 				rc = server->ops->async_readv(rdata);
3891 			}
3892 		}
3893 
3894 		/* If the read was successfully sent, we are done */
3895 		if (!rc) {
3896 			/* Add to aio pending list */
3897 			list_add_tail(&rdata->list, rdata_list);
3898 			return 0;
3899 		}
3900 
3901 		/* Roll back credits and retry if needed */
3902 		add_credits_and_wake_if(server, &rdata->credits, 0);
3903 	} while (rc == -EAGAIN);
3904 
3905 fail:
3906 	kref_put(&rdata->refcount, cifs_readdata_release);
3907 	return rc;
3908 }
3909 
3910 static int
3911 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
3912 		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3913 		     struct cifs_aio_ctx *ctx)
3914 {
3915 	struct cifs_readdata *rdata;
3916 	unsigned int rsize, nsegs, max_segs = INT_MAX;
3917 	struct cifs_credits credits_on_stack;
3918 	struct cifs_credits *credits = &credits_on_stack;
3919 	size_t cur_len, max_len;
3920 	int rc;
3921 	pid_t pid;
3922 	struct TCP_Server_Info *server;
3923 
3924 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3925 
3926 #ifdef CONFIG_CIFS_SMB_DIRECT
3927 	if (server->smbd_conn)
3928 		max_segs = server->smbd_conn->max_frmr_depth;
3929 #endif
3930 
3931 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3932 		pid = open_file->pid;
3933 	else
3934 		pid = current->tgid;
3935 
3936 	do {
3937 		if (open_file->invalidHandle) {
3938 			rc = cifs_reopen_file(open_file, true);
3939 			if (rc == -EAGAIN)
3940 				continue;
3941 			else if (rc)
3942 				break;
3943 		}
3944 
3945 		if (cifs_sb->ctx->rsize == 0)
3946 			cifs_sb->ctx->rsize =
3947 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
3948 							     cifs_sb->ctx);
3949 
3950 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3951 						   &rsize, credits);
3952 		if (rc)
3953 			break;
3954 
3955 		max_len = min_t(size_t, len, rsize);
3956 
3957 		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
3958 						 max_segs, &nsegs);
3959 		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3960 			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
3961 		if (cur_len == 0) {
3962 			rc = -EIO;
3963 			add_credits_and_wake_if(server, credits, 0);
3964 			break;
3965 		}
3966 
3967 		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
3968 		if (!rdata) {
3969 			add_credits_and_wake_if(server, credits, 0);
3970 			rc = -ENOMEM;
3971 			break;
3972 		}
3973 
3974 		rdata->server	= server;
3975 		rdata->cfile	= cifsFileInfo_get(open_file);
3976 		rdata->offset	= fpos;
3977 		rdata->bytes	= cur_len;
3978 		rdata->pid	= pid;
3979 		rdata->credits	= credits_on_stack;
3980 		rdata->ctx	= ctx;
3981 		kref_get(&ctx->refcount);
3982 
3983 		rdata->iter	= ctx->iter;
3984 		iov_iter_truncate(&rdata->iter, cur_len);
3985 
3986 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3987 
3988 		if (!rc) {
3989 			if (rdata->cfile->invalidHandle)
3990 				rc = -EAGAIN;
3991 			else
3992 				rc = server->ops->async_readv(rdata);
3993 		}
3994 
3995 		if (rc) {
3996 			add_credits_and_wake_if(server, &rdata->credits, 0);
3997 			kref_put(&rdata->refcount, cifs_readdata_release);
3998 			if (rc == -EAGAIN)
3999 				continue;
4000 			break;
4001 		}
4002 
4003 		list_add_tail(&rdata->list, rdata_list);
4004 		iov_iter_advance(&ctx->iter, cur_len);
4005 		fpos += cur_len;
4006 		len -= cur_len;
4007 	} while (len > 0);
4008 
4009 	return rc;
4010 }
4011 
4012 static void
4013 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4014 {
4015 	struct cifs_readdata *rdata, *tmp;
4016 	struct cifs_sb_info *cifs_sb;
4017 	int rc;
4018 
4019 	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4020 
4021 	mutex_lock(&ctx->aio_mutex);
4022 
4023 	if (list_empty(&ctx->list)) {
4024 		mutex_unlock(&ctx->aio_mutex);
4025 		return;
4026 	}
4027 
4028 	rc = ctx->rc;
4029 	/* the loop below should proceed in the order of increasing offsets */
4030 again:
4031 	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4032 		if (!rc) {
4033 			if (!try_wait_for_completion(&rdata->done)) {
4034 				mutex_unlock(&ctx->aio_mutex);
4035 				return;
4036 			}
4037 
4038 			if (rdata->result == -EAGAIN) {
4039 				/* resend call if it's a retryable error */
4040 				struct list_head tmp_list;
4041 				unsigned int got_bytes = rdata->got_bytes;
4042 
4043 				list_del_init(&rdata->list);
4044 				INIT_LIST_HEAD(&tmp_list);
4045 
4046 				if (ctx->direct_io) {
4047 					/*
4048 					 * Re-use rdata as this is a
4049 					 * direct I/O
4050 					 */
4051 					rc = cifs_resend_rdata(
4052 						rdata,
4053 						&tmp_list, ctx);
4054 				} else {
4055 					rc = cifs_send_async_read(
4056 						rdata->offset + got_bytes,
4057 						rdata->bytes - got_bytes,
4058 						rdata->cfile, cifs_sb,
4059 						&tmp_list, ctx);
4060 
4061 					kref_put(&rdata->refcount,
4062 						cifs_readdata_release);
4063 				}
4064 
4065 				list_splice(&tmp_list, &ctx->list);
4066 
4067 				goto again;
4068 			} else if (rdata->result)
4069 				rc = rdata->result;
4070 
4071 			/* if there was a short read -- discard anything left */
4072 			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4073 				rc = -ENODATA;
4074 
4075 			ctx->total_len += rdata->got_bytes;
4076 		}
4077 		list_del_init(&rdata->list);
4078 		kref_put(&rdata->refcount, cifs_readdata_release);
4079 	}
4080 
4081 	/* mask nodata case */
4082 	if (rc == -ENODATA)
4083 		rc = 0;
4084 
4085 	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4086 
4087 	mutex_unlock(&ctx->aio_mutex);
4088 
4089 	if (ctx->iocb && ctx->iocb->ki_complete)
4090 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4091 	else
4092 		complete(&ctx->done);
4093 }
4094 
4095 static ssize_t __cifs_readv(
4096 	struct kiocb *iocb, struct iov_iter *to, bool direct)
4097 {
4098 	size_t len;
4099 	struct file *file = iocb->ki_filp;
4100 	struct cifs_sb_info *cifs_sb;
4101 	struct cifsFileInfo *cfile;
4102 	struct cifs_tcon *tcon;
4103 	ssize_t rc, total_read = 0;
4104 	loff_t offset = iocb->ki_pos;
4105 	struct cifs_aio_ctx *ctx;
4106 
4107 	len = iov_iter_count(to);
4108 	if (!len)
4109 		return 0;
4110 
4111 	cifs_sb = CIFS_FILE_SB(file);
4112 	cfile = file->private_data;
4113 	tcon = tlink_tcon(cfile->tlink);
4114 
4115 	if (!tcon->ses->server->ops->async_readv)
4116 		return -ENOSYS;
4117 
4118 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4119 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4120 
4121 	ctx = cifs_aio_ctx_alloc();
4122 	if (!ctx)
4123 		return -ENOMEM;
4124 
4125 	ctx->pos	= offset;
4126 	ctx->direct_io	= direct;
4127 	ctx->len	= len;
4128 	ctx->cfile	= cifsFileInfo_get(cfile);
4129 	ctx->nr_pinned_pages = 0;
4130 
4131 	if (!is_sync_kiocb(iocb))
4132 		ctx->iocb = iocb;
4133 
4134 	if (user_backed_iter(to)) {
4135 		/*
4136 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4137 		 * they contain references to the calling process's virtual
4138 		 * memory layout which won't be available in an async worker
4139 		 * thread.  This also takes a pin on every folio involved.
4140 		 */
4141 		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4142 					     &ctx->iter, 0);
4143 		if (rc < 0) {
4144 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4145 			return rc;
4146 		}
4147 
4148 		ctx->nr_pinned_pages = rc;
4149 		ctx->bv = (void *)ctx->iter.bvec;
4150 		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4151 		ctx->should_dirty = true;
4152 	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4153 		   !is_sync_kiocb(iocb)) {
4154 		/*
4155 		 * If the op is asynchronous, we need to copy the list attached
4156 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4157 		 * will be retained by the caller; in any case, we may or may
4158 		 * not be able to pin the pages, so we don't try.
4159 		 */
4160 		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4161 		if (!ctx->bv) {
4162 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4163 			return -ENOMEM;
4164 		}
4165 	} else {
4166 		/*
4167 		 * Otherwise, we just pass the iterator down as-is and rely on
4168 		 * the caller to make sure the pages referred to by the
4169 		 * iterator don't evaporate.
4170 		 */
4171 		ctx->iter = *to;
4172 	}
4173 
4174 	if (direct) {
4175 		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4176 						  offset, offset + len - 1);
4177 		if (rc) {
4178 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4179 			return -EAGAIN;
4180 		}
4181 	}
4182 
4183 	/* grab a lock here due to read response handlers can access ctx */
4184 	mutex_lock(&ctx->aio_mutex);
4185 
4186 	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4187 
4188 	/* if at least one read request send succeeded, then reset rc */
4189 	if (!list_empty(&ctx->list))
4190 		rc = 0;
4191 
4192 	mutex_unlock(&ctx->aio_mutex);
4193 
4194 	if (rc) {
4195 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4196 		return rc;
4197 	}
4198 
4199 	if (!is_sync_kiocb(iocb)) {
4200 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4201 		return -EIOCBQUEUED;
4202 	}
4203 
4204 	rc = wait_for_completion_killable(&ctx->done);
4205 	if (rc) {
4206 		mutex_lock(&ctx->aio_mutex);
4207 		ctx->rc = rc = -EINTR;
4208 		total_read = ctx->total_len;
4209 		mutex_unlock(&ctx->aio_mutex);
4210 	} else {
4211 		rc = ctx->rc;
4212 		total_read = ctx->total_len;
4213 	}
4214 
4215 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4216 
4217 	if (total_read) {
4218 		iocb->ki_pos += total_read;
4219 		return total_read;
4220 	}
4221 	return rc;
4222 }
4223 
4224 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4225 {
4226 	return __cifs_readv(iocb, to, true);
4227 }
4228 
4229 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4230 {
4231 	return __cifs_readv(iocb, to, false);
4232 }
4233 
4234 ssize_t
4235 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4236 {
4237 	struct inode *inode = file_inode(iocb->ki_filp);
4238 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4239 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4240 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4241 						iocb->ki_filp->private_data;
4242 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4243 	int rc = -EACCES;
4244 
4245 	/*
4246 	 * In strict cache mode we need to read from the server all the time
4247 	 * if we don't have level II oplock because the server can delay mtime
4248 	 * change - so we can't make a decision about inode invalidating.
4249 	 * And we can also fail with pagereading if there are mandatory locks
4250 	 * on pages affected by this read but not on the region from pos to
4251 	 * pos+len-1.
4252 	 */
4253 	if (!CIFS_CACHE_READ(cinode))
4254 		return cifs_user_readv(iocb, to);
4255 
4256 	if (cap_unix(tcon->ses) &&
4257 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4258 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4259 		return generic_file_read_iter(iocb, to);
4260 
4261 	/*
4262 	 * We need to hold the sem to be sure nobody modifies lock list
4263 	 * with a brlock that prevents reading.
4264 	 */
4265 	down_read(&cinode->lock_sem);
4266 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4267 				     tcon->ses->server->vals->shared_lock_type,
4268 				     0, NULL, CIFS_READ_OP))
4269 		rc = generic_file_read_iter(iocb, to);
4270 	up_read(&cinode->lock_sem);
4271 	return rc;
4272 }
4273 
4274 static ssize_t
4275 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4276 {
4277 	int rc = -EACCES;
4278 	unsigned int bytes_read = 0;
4279 	unsigned int total_read;
4280 	unsigned int current_read_size;
4281 	unsigned int rsize;
4282 	struct cifs_sb_info *cifs_sb;
4283 	struct cifs_tcon *tcon;
4284 	struct TCP_Server_Info *server;
4285 	unsigned int xid;
4286 	char *cur_offset;
4287 	struct cifsFileInfo *open_file;
4288 	struct cifs_io_parms io_parms = {0};
4289 	int buf_type = CIFS_NO_BUFFER;
4290 	__u32 pid;
4291 
4292 	xid = get_xid();
4293 	cifs_sb = CIFS_FILE_SB(file);
4294 
4295 	/* FIXME: set up handlers for larger reads and/or convert to async */
4296 	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4297 
4298 	if (file->private_data == NULL) {
4299 		rc = -EBADF;
4300 		free_xid(xid);
4301 		return rc;
4302 	}
4303 	open_file = file->private_data;
4304 	tcon = tlink_tcon(open_file->tlink);
4305 	server = cifs_pick_channel(tcon->ses);
4306 
4307 	if (!server->ops->sync_read) {
4308 		free_xid(xid);
4309 		return -ENOSYS;
4310 	}
4311 
4312 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4313 		pid = open_file->pid;
4314 	else
4315 		pid = current->tgid;
4316 
4317 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4318 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4319 
4320 	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4321 	     total_read += bytes_read, cur_offset += bytes_read) {
4322 		do {
4323 			current_read_size = min_t(uint, read_size - total_read,
4324 						  rsize);
4325 			/*
4326 			 * For windows me and 9x we do not want to request more
4327 			 * than it negotiated since it will refuse the read
4328 			 * then.
4329 			 */
4330 			if (!(tcon->ses->capabilities &
4331 				tcon->ses->server->vals->cap_large_files)) {
4332 				current_read_size = min_t(uint,
4333 					current_read_size, CIFSMaxBufSize);
4334 			}
4335 			if (open_file->invalidHandle) {
4336 				rc = cifs_reopen_file(open_file, true);
4337 				if (rc != 0)
4338 					break;
4339 			}
4340 			io_parms.pid = pid;
4341 			io_parms.tcon = tcon;
4342 			io_parms.offset = *offset;
4343 			io_parms.length = current_read_size;
4344 			io_parms.server = server;
4345 			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4346 						    &bytes_read, &cur_offset,
4347 						    &buf_type);
4348 		} while (rc == -EAGAIN);
4349 
4350 		if (rc || (bytes_read == 0)) {
4351 			if (total_read) {
4352 				break;
4353 			} else {
4354 				free_xid(xid);
4355 				return rc;
4356 			}
4357 		} else {
4358 			cifs_stats_bytes_read(tcon, total_read);
4359 			*offset += bytes_read;
4360 		}
4361 	}
4362 	free_xid(xid);
4363 	return total_read;
4364 }
4365 
4366 /*
4367  * If the page is mmap'ed into a process' page tables, then we need to make
4368  * sure that it doesn't change while being written back.
4369  */
4370 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4371 {
4372 	struct folio *folio = page_folio(vmf->page);
4373 
4374 	/* Wait for the folio to be written to the cache before we allow it to
4375 	 * be modified.  We then assume the entire folio will need writing back.
4376 	 */
4377 #ifdef CONFIG_CIFS_FSCACHE
4378 	if (folio_test_fscache(folio) &&
4379 	    folio_wait_fscache_killable(folio) < 0)
4380 		return VM_FAULT_RETRY;
4381 #endif
4382 
4383 	folio_wait_writeback(folio);
4384 
4385 	if (folio_lock_killable(folio) < 0)
4386 		return VM_FAULT_RETRY;
4387 	return VM_FAULT_LOCKED;
4388 }
4389 
4390 static const struct vm_operations_struct cifs_file_vm_ops = {
4391 	.fault = filemap_fault,
4392 	.map_pages = filemap_map_pages,
4393 	.page_mkwrite = cifs_page_mkwrite,
4394 };
4395 
4396 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4397 {
4398 	int xid, rc = 0;
4399 	struct inode *inode = file_inode(file);
4400 
4401 	xid = get_xid();
4402 
4403 	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4404 		rc = cifs_zap_mapping(inode);
4405 	if (!rc)
4406 		rc = generic_file_mmap(file, vma);
4407 	if (!rc)
4408 		vma->vm_ops = &cifs_file_vm_ops;
4409 
4410 	free_xid(xid);
4411 	return rc;
4412 }
4413 
4414 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4415 {
4416 	int rc, xid;
4417 
4418 	xid = get_xid();
4419 
4420 	rc = cifs_revalidate_file(file);
4421 	if (rc)
4422 		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4423 			 rc);
4424 	if (!rc)
4425 		rc = generic_file_mmap(file, vma);
4426 	if (!rc)
4427 		vma->vm_ops = &cifs_file_vm_ops;
4428 
4429 	free_xid(xid);
4430 	return rc;
4431 }
4432 
4433 /*
4434  * Unlock a bunch of folios in the pagecache.
4435  */
4436 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4437 {
4438 	struct folio *folio;
4439 	XA_STATE(xas, &mapping->i_pages, first);
4440 
4441 	rcu_read_lock();
4442 	xas_for_each(&xas, folio, last) {
4443 		folio_unlock(folio);
4444 	}
4445 	rcu_read_unlock();
4446 }
4447 
4448 static void cifs_readahead_complete(struct work_struct *work)
4449 {
4450 	struct cifs_readdata *rdata = container_of(work,
4451 						   struct cifs_readdata, work);
4452 	struct folio *folio;
4453 	pgoff_t last;
4454 	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4455 
4456 	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4457 
4458 	if (good)
4459 		cifs_readahead_to_fscache(rdata->mapping->host,
4460 					  rdata->offset, rdata->bytes);
4461 
4462 	if (iov_iter_count(&rdata->iter) > 0)
4463 		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4464 
4465 	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4466 
4467 	rcu_read_lock();
4468 	xas_for_each(&xas, folio, last) {
4469 		if (good) {
4470 			flush_dcache_folio(folio);
4471 			folio_mark_uptodate(folio);
4472 		}
4473 		folio_unlock(folio);
4474 	}
4475 	rcu_read_unlock();
4476 
4477 	kref_put(&rdata->refcount, cifs_readdata_release);
4478 }
4479 
4480 static void cifs_readahead(struct readahead_control *ractl)
4481 {
4482 	struct cifsFileInfo *open_file = ractl->file->private_data;
4483 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4484 	struct TCP_Server_Info *server;
4485 	unsigned int xid, nr_pages, cache_nr_pages = 0;
4486 	unsigned int ra_pages;
4487 	pgoff_t next_cached = ULONG_MAX, ra_index;
4488 	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4489 		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4490 	bool check_cache = caching;
4491 	pid_t pid;
4492 	int rc = 0;
4493 
4494 	/* Note that readahead_count() lags behind our dequeuing of pages from
4495 	 * the ractl, wo we have to keep track for ourselves.
4496 	 */
4497 	ra_pages = readahead_count(ractl);
4498 	ra_index = readahead_index(ractl);
4499 
4500 	xid = get_xid();
4501 
4502 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4503 		pid = open_file->pid;
4504 	else
4505 		pid = current->tgid;
4506 
4507 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4508 
4509 	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4510 		 __func__, ractl->file, ractl->mapping, ra_pages);
4511 
4512 	/*
4513 	 * Chop the readahead request up into rsize-sized read requests.
4514 	 */
4515 	while ((nr_pages = ra_pages)) {
4516 		unsigned int i, rsize;
4517 		struct cifs_readdata *rdata;
4518 		struct cifs_credits credits_on_stack;
4519 		struct cifs_credits *credits = &credits_on_stack;
4520 		struct folio *folio;
4521 		pgoff_t fsize;
4522 
4523 		/*
4524 		 * Find out if we have anything cached in the range of
4525 		 * interest, and if so, where the next chunk of cached data is.
4526 		 */
4527 		if (caching) {
4528 			if (check_cache) {
4529 				rc = cifs_fscache_query_occupancy(
4530 					ractl->mapping->host, ra_index, nr_pages,
4531 					&next_cached, &cache_nr_pages);
4532 				if (rc < 0)
4533 					caching = false;
4534 				check_cache = false;
4535 			}
4536 
4537 			if (ra_index == next_cached) {
4538 				/*
4539 				 * TODO: Send a whole batch of pages to be read
4540 				 * by the cache.
4541 				 */
4542 				folio = readahead_folio(ractl);
4543 				fsize = folio_nr_pages(folio);
4544 				ra_pages -= fsize;
4545 				ra_index += fsize;
4546 				if (cifs_readpage_from_fscache(ractl->mapping->host,
4547 							       &folio->page) < 0) {
4548 					/*
4549 					 * TODO: Deal with cache read failure
4550 					 * here, but for the moment, delegate
4551 					 * that to readpage.
4552 					 */
4553 					caching = false;
4554 				}
4555 				folio_unlock(folio);
4556 				next_cached += fsize;
4557 				cache_nr_pages -= fsize;
4558 				if (cache_nr_pages == 0)
4559 					check_cache = true;
4560 				continue;
4561 			}
4562 		}
4563 
4564 		if (open_file->invalidHandle) {
4565 			rc = cifs_reopen_file(open_file, true);
4566 			if (rc) {
4567 				if (rc == -EAGAIN)
4568 					continue;
4569 				break;
4570 			}
4571 		}
4572 
4573 		if (cifs_sb->ctx->rsize == 0)
4574 			cifs_sb->ctx->rsize =
4575 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4576 							     cifs_sb->ctx);
4577 
4578 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4579 						   &rsize, credits);
4580 		if (rc)
4581 			break;
4582 		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4583 		if (next_cached != ULONG_MAX)
4584 			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4585 
4586 		/*
4587 		 * Give up immediately if rsize is too small to read an entire
4588 		 * page. The VFS will fall back to readpage. We should never
4589 		 * reach this point however since we set ra_pages to 0 when the
4590 		 * rsize is smaller than a cache page.
4591 		 */
4592 		if (unlikely(!nr_pages)) {
4593 			add_credits_and_wake_if(server, credits, 0);
4594 			break;
4595 		}
4596 
4597 		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4598 		if (!rdata) {
4599 			/* best to give up if we're out of mem */
4600 			add_credits_and_wake_if(server, credits, 0);
4601 			break;
4602 		}
4603 
4604 		rdata->offset	= ra_index * PAGE_SIZE;
4605 		rdata->bytes	= nr_pages * PAGE_SIZE;
4606 		rdata->cfile	= cifsFileInfo_get(open_file);
4607 		rdata->server	= server;
4608 		rdata->mapping	= ractl->mapping;
4609 		rdata->pid	= pid;
4610 		rdata->credits	= credits_on_stack;
4611 
4612 		for (i = 0; i < nr_pages; i++) {
4613 			if (!readahead_folio(ractl))
4614 				WARN_ON(1);
4615 		}
4616 		ra_pages -= nr_pages;
4617 		ra_index += nr_pages;
4618 
4619 		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4620 				rdata->offset, rdata->bytes);
4621 
4622 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4623 		if (!rc) {
4624 			if (rdata->cfile->invalidHandle)
4625 				rc = -EAGAIN;
4626 			else
4627 				rc = server->ops->async_readv(rdata);
4628 		}
4629 
4630 		if (rc) {
4631 			add_credits_and_wake_if(server, &rdata->credits, 0);
4632 			cifs_unlock_folios(rdata->mapping,
4633 					   rdata->offset / PAGE_SIZE,
4634 					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4635 			/* Fallback to the readpage in error/reconnect cases */
4636 			kref_put(&rdata->refcount, cifs_readdata_release);
4637 			break;
4638 		}
4639 
4640 		kref_put(&rdata->refcount, cifs_readdata_release);
4641 	}
4642 
4643 	free_xid(xid);
4644 }
4645 
4646 /*
4647  * cifs_readpage_worker must be called with the page pinned
4648  */
4649 static int cifs_readpage_worker(struct file *file, struct page *page,
4650 	loff_t *poffset)
4651 {
4652 	struct inode *inode = file_inode(file);
4653 	struct timespec64 atime, mtime;
4654 	char *read_data;
4655 	int rc;
4656 
4657 	/* Is the page cached? */
4658 	rc = cifs_readpage_from_fscache(inode, page);
4659 	if (rc == 0)
4660 		goto read_complete;
4661 
4662 	read_data = kmap(page);
4663 	/* for reads over a certain size could initiate async read ahead */
4664 
4665 	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4666 
4667 	if (rc < 0)
4668 		goto io_error;
4669 	else
4670 		cifs_dbg(FYI, "Bytes read %d\n", rc);
4671 
4672 	/* we do not want atime to be less than mtime, it broke some apps */
4673 	atime = inode_set_atime_to_ts(inode, current_time(inode));
4674 	mtime = inode_get_mtime(inode);
4675 	if (timespec64_compare(&atime, &mtime) < 0)
4676 		inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4677 
4678 	if (PAGE_SIZE > rc)
4679 		memset(read_data + rc, 0, PAGE_SIZE - rc);
4680 
4681 	flush_dcache_page(page);
4682 	SetPageUptodate(page);
4683 	rc = 0;
4684 
4685 io_error:
4686 	kunmap(page);
4687 
4688 read_complete:
4689 	unlock_page(page);
4690 	return rc;
4691 }
4692 
4693 static int cifs_read_folio(struct file *file, struct folio *folio)
4694 {
4695 	struct page *page = &folio->page;
4696 	loff_t offset = page_file_offset(page);
4697 	int rc = -EACCES;
4698 	unsigned int xid;
4699 
4700 	xid = get_xid();
4701 
4702 	if (file->private_data == NULL) {
4703 		rc = -EBADF;
4704 		free_xid(xid);
4705 		return rc;
4706 	}
4707 
4708 	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4709 		 page, (int)offset, (int)offset);
4710 
4711 	rc = cifs_readpage_worker(file, page, &offset);
4712 
4713 	free_xid(xid);
4714 	return rc;
4715 }
4716 
4717 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4718 {
4719 	struct cifsFileInfo *open_file;
4720 
4721 	spin_lock(&cifs_inode->open_file_lock);
4722 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4723 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4724 			spin_unlock(&cifs_inode->open_file_lock);
4725 			return 1;
4726 		}
4727 	}
4728 	spin_unlock(&cifs_inode->open_file_lock);
4729 	return 0;
4730 }
4731 
4732 /* We do not want to update the file size from server for inodes
4733    open for write - to avoid races with writepage extending
4734    the file - in the future we could consider allowing
4735    refreshing the inode only on increases in the file size
4736    but this is tricky to do without racing with writebehind
4737    page caching in the current Linux kernel design */
4738 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4739 {
4740 	if (!cifsInode)
4741 		return true;
4742 
4743 	if (is_inode_writable(cifsInode)) {
4744 		/* This inode is open for write at least once */
4745 		struct cifs_sb_info *cifs_sb;
4746 
4747 		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4748 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4749 			/* since no page cache to corrupt on directio
4750 			we can change size safely */
4751 			return true;
4752 		}
4753 
4754 		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4755 			return true;
4756 
4757 		return false;
4758 	} else
4759 		return true;
4760 }
4761 
4762 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4763 			loff_t pos, unsigned len,
4764 			struct page **pagep, void **fsdata)
4765 {
4766 	int oncethru = 0;
4767 	pgoff_t index = pos >> PAGE_SHIFT;
4768 	loff_t offset = pos & (PAGE_SIZE - 1);
4769 	loff_t page_start = pos & PAGE_MASK;
4770 	loff_t i_size;
4771 	struct page *page;
4772 	int rc = 0;
4773 
4774 	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4775 
4776 start:
4777 	page = grab_cache_page_write_begin(mapping, index);
4778 	if (!page) {
4779 		rc = -ENOMEM;
4780 		goto out;
4781 	}
4782 
4783 	if (PageUptodate(page))
4784 		goto out;
4785 
4786 	/*
4787 	 * If we write a full page it will be up to date, no need to read from
4788 	 * the server. If the write is short, we'll end up doing a sync write
4789 	 * instead.
4790 	 */
4791 	if (len == PAGE_SIZE)
4792 		goto out;
4793 
4794 	/*
4795 	 * optimize away the read when we have an oplock, and we're not
4796 	 * expecting to use any of the data we'd be reading in. That
4797 	 * is, when the page lies beyond the EOF, or straddles the EOF
4798 	 * and the write will cover all of the existing data.
4799 	 */
4800 	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4801 		i_size = i_size_read(mapping->host);
4802 		if (page_start >= i_size ||
4803 		    (offset == 0 && (pos + len) >= i_size)) {
4804 			zero_user_segments(page, 0, offset,
4805 					   offset + len,
4806 					   PAGE_SIZE);
4807 			/*
4808 			 * PageChecked means that the parts of the page
4809 			 * to which we're not writing are considered up
4810 			 * to date. Once the data is copied to the
4811 			 * page, it can be set uptodate.
4812 			 */
4813 			SetPageChecked(page);
4814 			goto out;
4815 		}
4816 	}
4817 
4818 	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4819 		/*
4820 		 * might as well read a page, it is fast enough. If we get
4821 		 * an error, we don't need to return it. cifs_write_end will
4822 		 * do a sync write instead since PG_uptodate isn't set.
4823 		 */
4824 		cifs_readpage_worker(file, page, &page_start);
4825 		put_page(page);
4826 		oncethru = 1;
4827 		goto start;
4828 	} else {
4829 		/* we could try using another file handle if there is one -
4830 		   but how would we lock it to prevent close of that handle
4831 		   racing with this read? In any case
4832 		   this will be written out by write_end so is fine */
4833 	}
4834 out:
4835 	*pagep = page;
4836 	return rc;
4837 }
4838 
4839 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4840 {
4841 	if (folio_test_private(folio))
4842 		return 0;
4843 	if (folio_test_fscache(folio)) {
4844 		if (current_is_kswapd() || !(gfp & __GFP_FS))
4845 			return false;
4846 		folio_wait_fscache(folio);
4847 	}
4848 	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4849 	return true;
4850 }
4851 
4852 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4853 				 size_t length)
4854 {
4855 	folio_wait_fscache(folio);
4856 }
4857 
4858 static int cifs_launder_folio(struct folio *folio)
4859 {
4860 	int rc = 0;
4861 	loff_t range_start = folio_pos(folio);
4862 	loff_t range_end = range_start + folio_size(folio);
4863 	struct writeback_control wbc = {
4864 		.sync_mode = WB_SYNC_ALL,
4865 		.nr_to_write = 0,
4866 		.range_start = range_start,
4867 		.range_end = range_end,
4868 	};
4869 
4870 	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4871 
4872 	if (folio_clear_dirty_for_io(folio))
4873 		rc = cifs_writepage_locked(&folio->page, &wbc);
4874 
4875 	folio_wait_fscache(folio);
4876 	return rc;
4877 }
4878 
4879 void cifs_oplock_break(struct work_struct *work)
4880 {
4881 	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4882 						  oplock_break);
4883 	struct inode *inode = d_inode(cfile->dentry);
4884 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4885 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4886 	struct cifs_tcon *tcon;
4887 	struct TCP_Server_Info *server;
4888 	struct tcon_link *tlink;
4889 	int rc = 0;
4890 	bool purge_cache = false, oplock_break_cancelled;
4891 	__u64 persistent_fid, volatile_fid;
4892 	__u16 net_fid;
4893 
4894 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4895 			TASK_UNINTERRUPTIBLE);
4896 
4897 	tlink = cifs_sb_tlink(cifs_sb);
4898 	if (IS_ERR(tlink))
4899 		goto out;
4900 	tcon = tlink_tcon(tlink);
4901 	server = tcon->ses->server;
4902 
4903 	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4904 				      cfile->oplock_epoch, &purge_cache);
4905 
4906 	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4907 						cifs_has_mand_locks(cinode)) {
4908 		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4909 			 inode);
4910 		cinode->oplock = 0;
4911 	}
4912 
4913 	if (inode && S_ISREG(inode->i_mode)) {
4914 		if (CIFS_CACHE_READ(cinode))
4915 			break_lease(inode, O_RDONLY);
4916 		else
4917 			break_lease(inode, O_WRONLY);
4918 		rc = filemap_fdatawrite(inode->i_mapping);
4919 		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4920 			rc = filemap_fdatawait(inode->i_mapping);
4921 			mapping_set_error(inode->i_mapping, rc);
4922 			cifs_zap_mapping(inode);
4923 		}
4924 		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4925 		if (CIFS_CACHE_WRITE(cinode))
4926 			goto oplock_break_ack;
4927 	}
4928 
4929 	rc = cifs_push_locks(cfile);
4930 	if (rc)
4931 		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4932 
4933 oplock_break_ack:
4934 	/*
4935 	 * When oplock break is received and there are no active
4936 	 * file handles but cached, then schedule deferred close immediately.
4937 	 * So, new open will not use cached handle.
4938 	 */
4939 
4940 	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
4941 		cifs_close_deferred_file(cinode);
4942 
4943 	persistent_fid = cfile->fid.persistent_fid;
4944 	volatile_fid = cfile->fid.volatile_fid;
4945 	net_fid = cfile->fid.netfid;
4946 	oplock_break_cancelled = cfile->oplock_break_cancelled;
4947 
4948 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4949 	/*
4950 	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
4951 	 * an acknowledgment to be sent when the file has already been closed.
4952 	 */
4953 	spin_lock(&cinode->open_file_lock);
4954 	/* check list empty since can race with kill_sb calling tree disconnect */
4955 	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
4956 		spin_unlock(&cinode->open_file_lock);
4957 		rc = server->ops->oplock_response(tcon, persistent_fid,
4958 						  volatile_fid, net_fid, cinode);
4959 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4960 	} else
4961 		spin_unlock(&cinode->open_file_lock);
4962 
4963 	cifs_put_tlink(tlink);
4964 out:
4965 	cifs_done_oplock_break(cinode);
4966 }
4967 
4968 /*
4969  * The presence of cifs_direct_io() in the address space ops vector
4970  * allowes open() O_DIRECT flags which would have failed otherwise.
4971  *
4972  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4973  * so this method should never be called.
4974  *
4975  * Direct IO is not yet supported in the cached mode.
4976  */
4977 static ssize_t
4978 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4979 {
4980         /*
4981          * FIXME
4982          * Eventually need to support direct IO for non forcedirectio mounts
4983          */
4984         return -EINVAL;
4985 }
4986 
4987 static int cifs_swap_activate(struct swap_info_struct *sis,
4988 			      struct file *swap_file, sector_t *span)
4989 {
4990 	struct cifsFileInfo *cfile = swap_file->private_data;
4991 	struct inode *inode = swap_file->f_mapping->host;
4992 	unsigned long blocks;
4993 	long long isize;
4994 
4995 	cifs_dbg(FYI, "swap activate\n");
4996 
4997 	if (!swap_file->f_mapping->a_ops->swap_rw)
4998 		/* Cannot support swap */
4999 		return -EINVAL;
5000 
5001 	spin_lock(&inode->i_lock);
5002 	blocks = inode->i_blocks;
5003 	isize = inode->i_size;
5004 	spin_unlock(&inode->i_lock);
5005 	if (blocks*512 < isize) {
5006 		pr_warn("swap activate: swapfile has holes\n");
5007 		return -EINVAL;
5008 	}
5009 	*span = sis->pages;
5010 
5011 	pr_warn_once("Swap support over SMB3 is experimental\n");
5012 
5013 	/*
5014 	 * TODO: consider adding ACL (or documenting how) to prevent other
5015 	 * users (on this or other systems) from reading it
5016 	 */
5017 
5018 
5019 	/* TODO: add sk_set_memalloc(inet) or similar */
5020 
5021 	if (cfile)
5022 		cfile->swapfile = true;
5023 	/*
5024 	 * TODO: Since file already open, we can't open with DENY_ALL here
5025 	 * but we could add call to grab a byte range lock to prevent others
5026 	 * from reading or writing the file
5027 	 */
5028 
5029 	sis->flags |= SWP_FS_OPS;
5030 	return add_swap_extent(sis, 0, sis->max, 0);
5031 }
5032 
5033 static void cifs_swap_deactivate(struct file *file)
5034 {
5035 	struct cifsFileInfo *cfile = file->private_data;
5036 
5037 	cifs_dbg(FYI, "swap deactivate\n");
5038 
5039 	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5040 
5041 	if (cfile)
5042 		cfile->swapfile = false;
5043 
5044 	/* do we need to unpin (or unlock) the file */
5045 }
5046 
5047 const struct address_space_operations cifs_addr_ops = {
5048 	.read_folio = cifs_read_folio,
5049 	.readahead = cifs_readahead,
5050 	.writepages = cifs_writepages,
5051 	.write_begin = cifs_write_begin,
5052 	.write_end = cifs_write_end,
5053 	.dirty_folio = netfs_dirty_folio,
5054 	.release_folio = cifs_release_folio,
5055 	.direct_IO = cifs_direct_io,
5056 	.invalidate_folio = cifs_invalidate_folio,
5057 	.launder_folio = cifs_launder_folio,
5058 	.migrate_folio = filemap_migrate_folio,
5059 	/*
5060 	 * TODO: investigate and if useful we could add an is_dirty_writeback
5061 	 * helper if needed
5062 	 */
5063 	.swap_activate = cifs_swap_activate,
5064 	.swap_deactivate = cifs_swap_deactivate,
5065 };
5066 
5067 /*
5068  * cifs_readahead requires the server to support a buffer large enough to
5069  * contain the header plus one complete page of data.  Otherwise, we need
5070  * to leave cifs_readahead out of the address space operations.
5071  */
5072 const struct address_space_operations cifs_addr_ops_smallbuf = {
5073 	.read_folio = cifs_read_folio,
5074 	.writepages = cifs_writepages,
5075 	.write_begin = cifs_write_begin,
5076 	.write_end = cifs_write_end,
5077 	.dirty_folio = netfs_dirty_folio,
5078 	.release_folio = cifs_release_folio,
5079 	.invalidate_folio = cifs_invalidate_folio,
5080 	.launder_folio = cifs_launder_folio,
5081 	.migrate_folio = filemap_migrate_folio,
5082 };
5083