xref: /linux/fs/smb/client/file.c (revision f6e0a4984c2e7244689ea87b62b433bed9d07e94)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39 
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45 	struct address_space *mapping = inode->i_mapping;
46 	struct folio *folio;
47 	pgoff_t end;
48 
49 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50 
51 	rcu_read_lock();
52 
53 	end = (start + len - 1) / PAGE_SIZE;
54 	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55 		if (xas_retry(&xas, folio))
56 			continue;
57 		xas_pause(&xas);
58 		rcu_read_unlock();
59 		folio_lock(folio);
60 		folio_clear_dirty_for_io(folio);
61 		folio_unlock(folio);
62 		rcu_read_lock();
63 	}
64 
65 	rcu_read_unlock();
66 }
67 
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73 	struct address_space *mapping = inode->i_mapping;
74 	struct folio *folio;
75 	pgoff_t end;
76 
77 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78 
79 	if (!len)
80 		return;
81 
82 	rcu_read_lock();
83 
84 	end = (start + len - 1) / PAGE_SIZE;
85 	xas_for_each(&xas, folio, end) {
86 		if (xas_retry(&xas, folio))
87 			continue;
88 		if (!folio_test_writeback(folio)) {
89 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90 				  len, start, folio->index, end);
91 			continue;
92 		}
93 
94 		folio_detach_private(folio);
95 		folio_end_writeback(folio);
96 	}
97 
98 	rcu_read_unlock();
99 }
100 
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106 	struct address_space *mapping = inode->i_mapping;
107 	struct folio *folio;
108 	pgoff_t end;
109 
110 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111 
112 	if (!len)
113 		return;
114 
115 	rcu_read_lock();
116 
117 	end = (start + len - 1) / PAGE_SIZE;
118 	xas_for_each(&xas, folio, end) {
119 		if (xas_retry(&xas, folio))
120 			continue;
121 		if (!folio_test_writeback(folio)) {
122 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123 				  len, start, folio->index, end);
124 			continue;
125 		}
126 
127 		folio_set_error(folio);
128 		folio_end_writeback(folio);
129 	}
130 
131 	rcu_read_unlock();
132 }
133 
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139 	struct address_space *mapping = inode->i_mapping;
140 	struct folio *folio;
141 	pgoff_t end;
142 
143 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144 
145 	if (!len)
146 		return;
147 
148 	rcu_read_lock();
149 
150 	end = (start + len - 1) / PAGE_SIZE;
151 	xas_for_each(&xas, folio, end) {
152 		if (!folio_test_writeback(folio)) {
153 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154 				  len, start, folio->index, end);
155 			continue;
156 		}
157 
158 		filemap_dirty_folio(folio->mapping, folio);
159 		folio_end_writeback(folio);
160 	}
161 
162 	rcu_read_unlock();
163 }
164 
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172 	struct cifsFileInfo *open_file = NULL;
173 	struct list_head *tmp;
174 	struct list_head *tmp1;
175 
176 	/* only send once per connect */
177 	spin_lock(&tcon->tc_lock);
178 	if (tcon->need_reconnect)
179 		tcon->status = TID_NEED_RECON;
180 
181 	if (tcon->status != TID_NEED_RECON) {
182 		spin_unlock(&tcon->tc_lock);
183 		return;
184 	}
185 	tcon->status = TID_IN_FILES_INVALIDATE;
186 	spin_unlock(&tcon->tc_lock);
187 
188 	/* list all files open on tree connection and mark them invalid */
189 	spin_lock(&tcon->open_file_lock);
190 	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192 		open_file->invalidHandle = true;
193 		open_file->oplock_break_cancelled = true;
194 	}
195 	spin_unlock(&tcon->open_file_lock);
196 
197 	invalidate_all_cached_dirs(tcon);
198 	spin_lock(&tcon->tc_lock);
199 	if (tcon->status == TID_IN_FILES_INVALIDATE)
200 		tcon->status = TID_NEED_TCON;
201 	spin_unlock(&tcon->tc_lock);
202 
203 	/*
204 	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205 	 * to this tcon.
206 	 */
207 }
208 
209 static inline int cifs_convert_flags(unsigned int flags)
210 {
211 	if ((flags & O_ACCMODE) == O_RDONLY)
212 		return GENERIC_READ;
213 	else if ((flags & O_ACCMODE) == O_WRONLY)
214 		return GENERIC_WRITE;
215 	else if ((flags & O_ACCMODE) == O_RDWR) {
216 		/* GENERIC_ALL is too much permission to request
217 		   can cause unnecessary access denied on create */
218 		/* return GENERIC_ALL; */
219 		return (GENERIC_READ | GENERIC_WRITE);
220 	}
221 
222 	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223 		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224 		FILE_READ_DATA);
225 }
226 
227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228 static u32 cifs_posix_convert_flags(unsigned int flags)
229 {
230 	u32 posix_flags = 0;
231 
232 	if ((flags & O_ACCMODE) == O_RDONLY)
233 		posix_flags = SMB_O_RDONLY;
234 	else if ((flags & O_ACCMODE) == O_WRONLY)
235 		posix_flags = SMB_O_WRONLY;
236 	else if ((flags & O_ACCMODE) == O_RDWR)
237 		posix_flags = SMB_O_RDWR;
238 
239 	if (flags & O_CREAT) {
240 		posix_flags |= SMB_O_CREAT;
241 		if (flags & O_EXCL)
242 			posix_flags |= SMB_O_EXCL;
243 	} else if (flags & O_EXCL)
244 		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245 			 current->comm, current->tgid);
246 
247 	if (flags & O_TRUNC)
248 		posix_flags |= SMB_O_TRUNC;
249 	/* be safe and imply O_SYNC for O_DSYNC */
250 	if (flags & O_DSYNC)
251 		posix_flags |= SMB_O_SYNC;
252 	if (flags & O_DIRECTORY)
253 		posix_flags |= SMB_O_DIRECTORY;
254 	if (flags & O_NOFOLLOW)
255 		posix_flags |= SMB_O_NOFOLLOW;
256 	if (flags & O_DIRECT)
257 		posix_flags |= SMB_O_DIRECT;
258 
259 	return posix_flags;
260 }
261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262 
263 static inline int cifs_get_disposition(unsigned int flags)
264 {
265 	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266 		return FILE_CREATE;
267 	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268 		return FILE_OVERWRITE_IF;
269 	else if ((flags & O_CREAT) == O_CREAT)
270 		return FILE_OPEN_IF;
271 	else if ((flags & O_TRUNC) == O_TRUNC)
272 		return FILE_OVERWRITE;
273 	else
274 		return FILE_OPEN;
275 }
276 
277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278 int cifs_posix_open(const char *full_path, struct inode **pinode,
279 			struct super_block *sb, int mode, unsigned int f_flags,
280 			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
281 {
282 	int rc;
283 	FILE_UNIX_BASIC_INFO *presp_data;
284 	__u32 posix_flags = 0;
285 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286 	struct cifs_fattr fattr;
287 	struct tcon_link *tlink;
288 	struct cifs_tcon *tcon;
289 
290 	cifs_dbg(FYI, "posix open %s\n", full_path);
291 
292 	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293 	if (presp_data == NULL)
294 		return -ENOMEM;
295 
296 	tlink = cifs_sb_tlink(cifs_sb);
297 	if (IS_ERR(tlink)) {
298 		rc = PTR_ERR(tlink);
299 		goto posix_open_ret;
300 	}
301 
302 	tcon = tlink_tcon(tlink);
303 	mode &= ~current_umask();
304 
305 	posix_flags = cifs_posix_convert_flags(f_flags);
306 	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307 			     poplock, full_path, cifs_sb->local_nls,
308 			     cifs_remap(cifs_sb));
309 	cifs_put_tlink(tlink);
310 
311 	if (rc)
312 		goto posix_open_ret;
313 
314 	if (presp_data->Type == cpu_to_le32(-1))
315 		goto posix_open_ret; /* open ok, caller does qpathinfo */
316 
317 	if (!pinode)
318 		goto posix_open_ret; /* caller does not need info */
319 
320 	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321 
322 	/* get new inode and set it up */
323 	if (*pinode == NULL) {
324 		cifs_fill_uniqueid(sb, &fattr);
325 		*pinode = cifs_iget(sb, &fattr);
326 		if (!*pinode) {
327 			rc = -ENOMEM;
328 			goto posix_open_ret;
329 		}
330 	} else {
331 		cifs_revalidate_mapping(*pinode);
332 		rc = cifs_fattr_to_inode(*pinode, &fattr);
333 	}
334 
335 posix_open_ret:
336 	kfree(presp_data);
337 	return rc;
338 }
339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340 
341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342 			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343 			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344 {
345 	int rc;
346 	int desired_access;
347 	int disposition;
348 	int create_options = CREATE_NOT_DIR;
349 	struct TCP_Server_Info *server = tcon->ses->server;
350 	struct cifs_open_parms oparms;
351 
352 	if (!server->ops->open)
353 		return -ENOSYS;
354 
355 	desired_access = cifs_convert_flags(f_flags);
356 
357 /*********************************************************************
358  *  open flag mapping table:
359  *
360  *	POSIX Flag            CIFS Disposition
361  *	----------            ----------------
362  *	O_CREAT               FILE_OPEN_IF
363  *	O_CREAT | O_EXCL      FILE_CREATE
364  *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
365  *	O_TRUNC               FILE_OVERWRITE
366  *	none of the above     FILE_OPEN
367  *
368  *	Note that there is not a direct match between disposition
369  *	FILE_SUPERSEDE (ie create whether or not file exists although
370  *	O_CREAT | O_TRUNC is similar but truncates the existing
371  *	file rather than creating a new file as FILE_SUPERSEDE does
372  *	(which uses the attributes / metadata passed in on open call)
373  *?
374  *?  O_SYNC is a reasonable match to CIFS writethrough flag
375  *?  and the read write flags match reasonably.  O_LARGEFILE
376  *?  is irrelevant because largefile support is always used
377  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
378  *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
379  *********************************************************************/
380 
381 	disposition = cifs_get_disposition(f_flags);
382 
383 	/* BB pass O_SYNC flag through on file attributes .. BB */
384 
385 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
386 	if (f_flags & O_SYNC)
387 		create_options |= CREATE_WRITE_THROUGH;
388 
389 	if (f_flags & O_DIRECT)
390 		create_options |= CREATE_NO_BUFFER;
391 
392 	oparms = (struct cifs_open_parms) {
393 		.tcon = tcon,
394 		.cifs_sb = cifs_sb,
395 		.desired_access = desired_access,
396 		.create_options = cifs_create_options(cifs_sb, create_options),
397 		.disposition = disposition,
398 		.path = full_path,
399 		.fid = fid,
400 	};
401 
402 	rc = server->ops->open(xid, &oparms, oplock, buf);
403 	if (rc)
404 		return rc;
405 
406 	/* TODO: Add support for calling posix query info but with passing in fid */
407 	if (tcon->unix_ext)
408 		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
409 					      xid);
410 	else
411 		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
412 					 xid, fid);
413 
414 	if (rc) {
415 		server->ops->close(xid, tcon, fid);
416 		if (rc == -ESTALE)
417 			rc = -EOPENSTALE;
418 	}
419 
420 	return rc;
421 }
422 
423 static bool
424 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
425 {
426 	struct cifs_fid_locks *cur;
427 	bool has_locks = false;
428 
429 	down_read(&cinode->lock_sem);
430 	list_for_each_entry(cur, &cinode->llist, llist) {
431 		if (!list_empty(&cur->locks)) {
432 			has_locks = true;
433 			break;
434 		}
435 	}
436 	up_read(&cinode->lock_sem);
437 	return has_locks;
438 }
439 
440 void
441 cifs_down_write(struct rw_semaphore *sem)
442 {
443 	while (!down_write_trylock(sem))
444 		msleep(10);
445 }
446 
447 static void cifsFileInfo_put_work(struct work_struct *work);
448 
449 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
450 				       struct tcon_link *tlink, __u32 oplock,
451 				       const char *symlink_target)
452 {
453 	struct dentry *dentry = file_dentry(file);
454 	struct inode *inode = d_inode(dentry);
455 	struct cifsInodeInfo *cinode = CIFS_I(inode);
456 	struct cifsFileInfo *cfile;
457 	struct cifs_fid_locks *fdlocks;
458 	struct cifs_tcon *tcon = tlink_tcon(tlink);
459 	struct TCP_Server_Info *server = tcon->ses->server;
460 
461 	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
462 	if (cfile == NULL)
463 		return cfile;
464 
465 	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
466 	if (!fdlocks) {
467 		kfree(cfile);
468 		return NULL;
469 	}
470 
471 	if (symlink_target) {
472 		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
473 		if (!cfile->symlink_target) {
474 			kfree(fdlocks);
475 			kfree(cfile);
476 			return NULL;
477 		}
478 	}
479 
480 	INIT_LIST_HEAD(&fdlocks->locks);
481 	fdlocks->cfile = cfile;
482 	cfile->llist = fdlocks;
483 
484 	cfile->count = 1;
485 	cfile->pid = current->tgid;
486 	cfile->uid = current_fsuid();
487 	cfile->dentry = dget(dentry);
488 	cfile->f_flags = file->f_flags;
489 	cfile->invalidHandle = false;
490 	cfile->deferred_close_scheduled = false;
491 	cfile->tlink = cifs_get_tlink(tlink);
492 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
493 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
494 	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
495 	mutex_init(&cfile->fh_mutex);
496 	spin_lock_init(&cfile->file_info_lock);
497 
498 	cifs_sb_active(inode->i_sb);
499 
500 	/*
501 	 * If the server returned a read oplock and we have mandatory brlocks,
502 	 * set oplock level to None.
503 	 */
504 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
505 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
506 		oplock = 0;
507 	}
508 
509 	cifs_down_write(&cinode->lock_sem);
510 	list_add(&fdlocks->llist, &cinode->llist);
511 	up_write(&cinode->lock_sem);
512 
513 	spin_lock(&tcon->open_file_lock);
514 	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
515 		oplock = fid->pending_open->oplock;
516 	list_del(&fid->pending_open->olist);
517 
518 	fid->purge_cache = false;
519 	server->ops->set_fid(cfile, fid, oplock);
520 
521 	list_add(&cfile->tlist, &tcon->openFileList);
522 	atomic_inc(&tcon->num_local_opens);
523 
524 	/* if readable file instance put first in list*/
525 	spin_lock(&cinode->open_file_lock);
526 	if (file->f_mode & FMODE_READ)
527 		list_add(&cfile->flist, &cinode->openFileList);
528 	else
529 		list_add_tail(&cfile->flist, &cinode->openFileList);
530 	spin_unlock(&cinode->open_file_lock);
531 	spin_unlock(&tcon->open_file_lock);
532 
533 	if (fid->purge_cache)
534 		cifs_zap_mapping(inode);
535 
536 	file->private_data = cfile;
537 	return cfile;
538 }
539 
540 struct cifsFileInfo *
541 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
542 {
543 	spin_lock(&cifs_file->file_info_lock);
544 	cifsFileInfo_get_locked(cifs_file);
545 	spin_unlock(&cifs_file->file_info_lock);
546 	return cifs_file;
547 }
548 
549 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
550 {
551 	struct inode *inode = d_inode(cifs_file->dentry);
552 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
553 	struct cifsLockInfo *li, *tmp;
554 	struct super_block *sb = inode->i_sb;
555 
556 	/*
557 	 * Delete any outstanding lock records. We'll lose them when the file
558 	 * is closed anyway.
559 	 */
560 	cifs_down_write(&cifsi->lock_sem);
561 	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
562 		list_del(&li->llist);
563 		cifs_del_lock_waiters(li);
564 		kfree(li);
565 	}
566 	list_del(&cifs_file->llist->llist);
567 	kfree(cifs_file->llist);
568 	up_write(&cifsi->lock_sem);
569 
570 	cifs_put_tlink(cifs_file->tlink);
571 	dput(cifs_file->dentry);
572 	cifs_sb_deactive(sb);
573 	kfree(cifs_file->symlink_target);
574 	kfree(cifs_file);
575 }
576 
577 static void cifsFileInfo_put_work(struct work_struct *work)
578 {
579 	struct cifsFileInfo *cifs_file = container_of(work,
580 			struct cifsFileInfo, put);
581 
582 	cifsFileInfo_put_final(cifs_file);
583 }
584 
585 /**
586  * cifsFileInfo_put - release a reference of file priv data
587  *
588  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
589  *
590  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
591  */
592 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
593 {
594 	_cifsFileInfo_put(cifs_file, true, true);
595 }
596 
597 /**
598  * _cifsFileInfo_put - release a reference of file priv data
599  *
600  * This may involve closing the filehandle @cifs_file out on the
601  * server. Must be called without holding tcon->open_file_lock,
602  * cinode->open_file_lock and cifs_file->file_info_lock.
603  *
604  * If @wait_for_oplock_handler is true and we are releasing the last
605  * reference, wait for any running oplock break handler of the file
606  * and cancel any pending one.
607  *
608  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
609  * @wait_oplock_handler: must be false if called from oplock_break_handler
610  * @offload:	not offloaded on close and oplock breaks
611  *
612  */
613 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
614 		       bool wait_oplock_handler, bool offload)
615 {
616 	struct inode *inode = d_inode(cifs_file->dentry);
617 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
618 	struct TCP_Server_Info *server = tcon->ses->server;
619 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
620 	struct super_block *sb = inode->i_sb;
621 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
622 	struct cifs_fid fid = {};
623 	struct cifs_pending_open open;
624 	bool oplock_break_cancelled;
625 
626 	spin_lock(&tcon->open_file_lock);
627 	spin_lock(&cifsi->open_file_lock);
628 	spin_lock(&cifs_file->file_info_lock);
629 	if (--cifs_file->count > 0) {
630 		spin_unlock(&cifs_file->file_info_lock);
631 		spin_unlock(&cifsi->open_file_lock);
632 		spin_unlock(&tcon->open_file_lock);
633 		return;
634 	}
635 	spin_unlock(&cifs_file->file_info_lock);
636 
637 	if (server->ops->get_lease_key)
638 		server->ops->get_lease_key(inode, &fid);
639 
640 	/* store open in pending opens to make sure we don't miss lease break */
641 	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
642 
643 	/* remove it from the lists */
644 	list_del(&cifs_file->flist);
645 	list_del(&cifs_file->tlist);
646 	atomic_dec(&tcon->num_local_opens);
647 
648 	if (list_empty(&cifsi->openFileList)) {
649 		cifs_dbg(FYI, "closing last open instance for inode %p\n",
650 			 d_inode(cifs_file->dentry));
651 		/*
652 		 * In strict cache mode we need invalidate mapping on the last
653 		 * close  because it may cause a error when we open this file
654 		 * again and get at least level II oplock.
655 		 */
656 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
657 			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
658 		cifs_set_oplock_level(cifsi, 0);
659 	}
660 
661 	spin_unlock(&cifsi->open_file_lock);
662 	spin_unlock(&tcon->open_file_lock);
663 
664 	oplock_break_cancelled = wait_oplock_handler ?
665 		cancel_work_sync(&cifs_file->oplock_break) : false;
666 
667 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
668 		struct TCP_Server_Info *server = tcon->ses->server;
669 		unsigned int xid;
670 
671 		xid = get_xid();
672 		if (server->ops->close_getattr)
673 			server->ops->close_getattr(xid, tcon, cifs_file);
674 		else if (server->ops->close)
675 			server->ops->close(xid, tcon, &cifs_file->fid);
676 		_free_xid(xid);
677 	}
678 
679 	if (oplock_break_cancelled)
680 		cifs_done_oplock_break(cifsi);
681 
682 	cifs_del_pending_open(&open);
683 
684 	if (offload)
685 		queue_work(fileinfo_put_wq, &cifs_file->put);
686 	else
687 		cifsFileInfo_put_final(cifs_file);
688 }
689 
690 int cifs_open(struct inode *inode, struct file *file)
691 
692 {
693 	int rc = -EACCES;
694 	unsigned int xid;
695 	__u32 oplock;
696 	struct cifs_sb_info *cifs_sb;
697 	struct TCP_Server_Info *server;
698 	struct cifs_tcon *tcon;
699 	struct tcon_link *tlink;
700 	struct cifsFileInfo *cfile = NULL;
701 	void *page;
702 	const char *full_path;
703 	bool posix_open_ok = false;
704 	struct cifs_fid fid = {};
705 	struct cifs_pending_open open;
706 	struct cifs_open_info_data data = {};
707 
708 	xid = get_xid();
709 
710 	cifs_sb = CIFS_SB(inode->i_sb);
711 	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
712 		free_xid(xid);
713 		return -EIO;
714 	}
715 
716 	tlink = cifs_sb_tlink(cifs_sb);
717 	if (IS_ERR(tlink)) {
718 		free_xid(xid);
719 		return PTR_ERR(tlink);
720 	}
721 	tcon = tlink_tcon(tlink);
722 	server = tcon->ses->server;
723 
724 	page = alloc_dentry_path();
725 	full_path = build_path_from_dentry(file_dentry(file), page);
726 	if (IS_ERR(full_path)) {
727 		rc = PTR_ERR(full_path);
728 		goto out;
729 	}
730 
731 	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
732 		 inode, file->f_flags, full_path);
733 
734 	if (file->f_flags & O_DIRECT &&
735 	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
736 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
737 			file->f_op = &cifs_file_direct_nobrl_ops;
738 		else
739 			file->f_op = &cifs_file_direct_ops;
740 	}
741 
742 	/* Get the cached handle as SMB2 close is deferred */
743 	rc = cifs_get_readable_path(tcon, full_path, &cfile);
744 	if (rc == 0) {
745 		if (file->f_flags == cfile->f_flags) {
746 			file->private_data = cfile;
747 			spin_lock(&CIFS_I(inode)->deferred_lock);
748 			cifs_del_deferred_close(cfile);
749 			spin_unlock(&CIFS_I(inode)->deferred_lock);
750 			goto use_cache;
751 		} else {
752 			_cifsFileInfo_put(cfile, true, false);
753 		}
754 	}
755 
756 	if (server->oplocks)
757 		oplock = REQ_OPLOCK;
758 	else
759 		oplock = 0;
760 
761 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
762 	if (!tcon->broken_posix_open && tcon->unix_ext &&
763 	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
764 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
765 		/* can not refresh inode info since size could be stale */
766 		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
767 				cifs_sb->ctx->file_mode /* ignored */,
768 				file->f_flags, &oplock, &fid.netfid, xid);
769 		if (rc == 0) {
770 			cifs_dbg(FYI, "posix open succeeded\n");
771 			posix_open_ok = true;
772 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
773 			if (tcon->ses->serverNOS)
774 				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
775 					 tcon->ses->ip_addr,
776 					 tcon->ses->serverNOS);
777 			tcon->broken_posix_open = true;
778 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
779 			 (rc != -EOPNOTSUPP)) /* path not found or net err */
780 			goto out;
781 		/*
782 		 * Else fallthrough to retry open the old way on network i/o
783 		 * or DFS errors.
784 		 */
785 	}
786 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
787 
788 	if (server->ops->get_lease_key)
789 		server->ops->get_lease_key(inode, &fid);
790 
791 	cifs_add_pending_open(&fid, tlink, &open);
792 
793 	if (!posix_open_ok) {
794 		if (server->ops->get_lease_key)
795 			server->ops->get_lease_key(inode, &fid);
796 
797 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
798 				  xid, &data);
799 		if (rc) {
800 			cifs_del_pending_open(&open);
801 			goto out;
802 		}
803 	}
804 
805 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
806 	if (cfile == NULL) {
807 		if (server->ops->close)
808 			server->ops->close(xid, tcon, &fid);
809 		cifs_del_pending_open(&open);
810 		rc = -ENOMEM;
811 		goto out;
812 	}
813 
814 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
815 	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
816 		/*
817 		 * Time to set mode which we can not set earlier due to
818 		 * problems creating new read-only files.
819 		 */
820 		struct cifs_unix_set_info_args args = {
821 			.mode	= inode->i_mode,
822 			.uid	= INVALID_UID, /* no change */
823 			.gid	= INVALID_GID, /* no change */
824 			.ctime	= NO_CHANGE_64,
825 			.atime	= NO_CHANGE_64,
826 			.mtime	= NO_CHANGE_64,
827 			.device	= 0,
828 		};
829 		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
830 				       cfile->pid);
831 	}
832 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
833 
834 use_cache:
835 	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
836 			   file->f_mode & FMODE_WRITE);
837 	if (file->f_flags & O_DIRECT &&
838 	    (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
839 	     file->f_flags & O_APPEND))
840 		cifs_invalidate_cache(file_inode(file),
841 				      FSCACHE_INVAL_DIO_WRITE);
842 
843 out:
844 	free_dentry_path(page);
845 	free_xid(xid);
846 	cifs_put_tlink(tlink);
847 	cifs_free_open_info(&data);
848 	return rc;
849 }
850 
851 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
852 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
853 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
854 
855 /*
856  * Try to reacquire byte range locks that were released when session
857  * to server was lost.
858  */
859 static int
860 cifs_relock_file(struct cifsFileInfo *cfile)
861 {
862 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
863 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
864 	int rc = 0;
865 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
866 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
867 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
868 
869 	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
870 	if (cinode->can_cache_brlcks) {
871 		/* can cache locks - no need to relock */
872 		up_read(&cinode->lock_sem);
873 		return rc;
874 	}
875 
876 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
877 	if (cap_unix(tcon->ses) &&
878 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
879 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
880 		rc = cifs_push_posix_locks(cfile);
881 	else
882 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
883 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
884 
885 	up_read(&cinode->lock_sem);
886 	return rc;
887 }
888 
889 static int
890 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
891 {
892 	int rc = -EACCES;
893 	unsigned int xid;
894 	__u32 oplock;
895 	struct cifs_sb_info *cifs_sb;
896 	struct cifs_tcon *tcon;
897 	struct TCP_Server_Info *server;
898 	struct cifsInodeInfo *cinode;
899 	struct inode *inode;
900 	void *page;
901 	const char *full_path;
902 	int desired_access;
903 	int disposition = FILE_OPEN;
904 	int create_options = CREATE_NOT_DIR;
905 	struct cifs_open_parms oparms;
906 
907 	xid = get_xid();
908 	mutex_lock(&cfile->fh_mutex);
909 	if (!cfile->invalidHandle) {
910 		mutex_unlock(&cfile->fh_mutex);
911 		free_xid(xid);
912 		return 0;
913 	}
914 
915 	inode = d_inode(cfile->dentry);
916 	cifs_sb = CIFS_SB(inode->i_sb);
917 	tcon = tlink_tcon(cfile->tlink);
918 	server = tcon->ses->server;
919 
920 	/*
921 	 * Can not grab rename sem here because various ops, including those
922 	 * that already have the rename sem can end up causing writepage to get
923 	 * called and if the server was down that means we end up here, and we
924 	 * can never tell if the caller already has the rename_sem.
925 	 */
926 	page = alloc_dentry_path();
927 	full_path = build_path_from_dentry(cfile->dentry, page);
928 	if (IS_ERR(full_path)) {
929 		mutex_unlock(&cfile->fh_mutex);
930 		free_dentry_path(page);
931 		free_xid(xid);
932 		return PTR_ERR(full_path);
933 	}
934 
935 	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
936 		 inode, cfile->f_flags, full_path);
937 
938 	if (tcon->ses->server->oplocks)
939 		oplock = REQ_OPLOCK;
940 	else
941 		oplock = 0;
942 
943 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
944 	if (tcon->unix_ext && cap_unix(tcon->ses) &&
945 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
946 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
947 		/*
948 		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
949 		 * original open. Must mask them off for a reopen.
950 		 */
951 		unsigned int oflags = cfile->f_flags &
952 						~(O_CREAT | O_EXCL | O_TRUNC);
953 
954 		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
955 				     cifs_sb->ctx->file_mode /* ignored */,
956 				     oflags, &oplock, &cfile->fid.netfid, xid);
957 		if (rc == 0) {
958 			cifs_dbg(FYI, "posix reopen succeeded\n");
959 			oparms.reconnect = true;
960 			goto reopen_success;
961 		}
962 		/*
963 		 * fallthrough to retry open the old way on errors, especially
964 		 * in the reconnect path it is important to retry hard
965 		 */
966 	}
967 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
968 
969 	desired_access = cifs_convert_flags(cfile->f_flags);
970 
971 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
972 	if (cfile->f_flags & O_SYNC)
973 		create_options |= CREATE_WRITE_THROUGH;
974 
975 	if (cfile->f_flags & O_DIRECT)
976 		create_options |= CREATE_NO_BUFFER;
977 
978 	if (server->ops->get_lease_key)
979 		server->ops->get_lease_key(inode, &cfile->fid);
980 
981 	oparms = (struct cifs_open_parms) {
982 		.tcon = tcon,
983 		.cifs_sb = cifs_sb,
984 		.desired_access = desired_access,
985 		.create_options = cifs_create_options(cifs_sb, create_options),
986 		.disposition = disposition,
987 		.path = full_path,
988 		.fid = &cfile->fid,
989 		.reconnect = true,
990 	};
991 
992 	/*
993 	 * Can not refresh inode by passing in file_info buf to be returned by
994 	 * ops->open and then calling get_inode_info with returned buf since
995 	 * file might have write behind data that needs to be flushed and server
996 	 * version of file size can be stale. If we knew for sure that inode was
997 	 * not dirty locally we could do this.
998 	 */
999 	rc = server->ops->open(xid, &oparms, &oplock, NULL);
1000 	if (rc == -ENOENT && oparms.reconnect == false) {
1001 		/* durable handle timeout is expired - open the file again */
1002 		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1003 		/* indicate that we need to relock the file */
1004 		oparms.reconnect = true;
1005 	}
1006 
1007 	if (rc) {
1008 		mutex_unlock(&cfile->fh_mutex);
1009 		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1010 		cifs_dbg(FYI, "oplock: %d\n", oplock);
1011 		goto reopen_error_exit;
1012 	}
1013 
1014 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1015 reopen_success:
1016 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1017 	cfile->invalidHandle = false;
1018 	mutex_unlock(&cfile->fh_mutex);
1019 	cinode = CIFS_I(inode);
1020 
1021 	if (can_flush) {
1022 		rc = filemap_write_and_wait(inode->i_mapping);
1023 		if (!is_interrupt_error(rc))
1024 			mapping_set_error(inode->i_mapping, rc);
1025 
1026 		if (tcon->posix_extensions) {
1027 			rc = smb311_posix_get_inode_info(&inode, full_path,
1028 							 NULL, inode->i_sb, xid);
1029 		} else if (tcon->unix_ext) {
1030 			rc = cifs_get_inode_info_unix(&inode, full_path,
1031 						      inode->i_sb, xid);
1032 		} else {
1033 			rc = cifs_get_inode_info(&inode, full_path, NULL,
1034 						 inode->i_sb, xid, NULL);
1035 		}
1036 	}
1037 	/*
1038 	 * Else we are writing out data to server already and could deadlock if
1039 	 * we tried to flush data, and since we do not know if we have data that
1040 	 * would invalidate the current end of file on the server we can not go
1041 	 * to the server to get the new inode info.
1042 	 */
1043 
1044 	/*
1045 	 * If the server returned a read oplock and we have mandatory brlocks,
1046 	 * set oplock level to None.
1047 	 */
1048 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1049 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1050 		oplock = 0;
1051 	}
1052 
1053 	server->ops->set_fid(cfile, &cfile->fid, oplock);
1054 	if (oparms.reconnect)
1055 		cifs_relock_file(cfile);
1056 
1057 reopen_error_exit:
1058 	free_dentry_path(page);
1059 	free_xid(xid);
1060 	return rc;
1061 }
1062 
1063 void smb2_deferred_work_close(struct work_struct *work)
1064 {
1065 	struct cifsFileInfo *cfile = container_of(work,
1066 			struct cifsFileInfo, deferred.work);
1067 
1068 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1069 	cifs_del_deferred_close(cfile);
1070 	cfile->deferred_close_scheduled = false;
1071 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1072 	_cifsFileInfo_put(cfile, true, false);
1073 }
1074 
1075 int cifs_close(struct inode *inode, struct file *file)
1076 {
1077 	struct cifsFileInfo *cfile;
1078 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1079 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1080 	struct cifs_deferred_close *dclose;
1081 
1082 	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1083 
1084 	if (file->private_data != NULL) {
1085 		cfile = file->private_data;
1086 		file->private_data = NULL;
1087 		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1088 		if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1089 		    && cinode->lease_granted &&
1090 		    !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1091 		    dclose) {
1092 			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1093 				inode_set_mtime_to_ts(inode,
1094 						      inode_set_ctime_current(inode));
1095 			}
1096 			spin_lock(&cinode->deferred_lock);
1097 			cifs_add_deferred_close(cfile, dclose);
1098 			if (cfile->deferred_close_scheduled &&
1099 			    delayed_work_pending(&cfile->deferred)) {
1100 				/*
1101 				 * If there is no pending work, mod_delayed_work queues new work.
1102 				 * So, Increase the ref count to avoid use-after-free.
1103 				 */
1104 				if (!mod_delayed_work(deferredclose_wq,
1105 						&cfile->deferred, cifs_sb->ctx->closetimeo))
1106 					cifsFileInfo_get(cfile);
1107 			} else {
1108 				/* Deferred close for files */
1109 				queue_delayed_work(deferredclose_wq,
1110 						&cfile->deferred, cifs_sb->ctx->closetimeo);
1111 				cfile->deferred_close_scheduled = true;
1112 				spin_unlock(&cinode->deferred_lock);
1113 				return 0;
1114 			}
1115 			spin_unlock(&cinode->deferred_lock);
1116 			_cifsFileInfo_put(cfile, true, false);
1117 		} else {
1118 			_cifsFileInfo_put(cfile, true, false);
1119 			kfree(dclose);
1120 		}
1121 	}
1122 
1123 	/* return code from the ->release op is always ignored */
1124 	return 0;
1125 }
1126 
1127 void
1128 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1129 {
1130 	struct cifsFileInfo *open_file, *tmp;
1131 	struct list_head tmp_list;
1132 
1133 	if (!tcon->use_persistent || !tcon->need_reopen_files)
1134 		return;
1135 
1136 	tcon->need_reopen_files = false;
1137 
1138 	cifs_dbg(FYI, "Reopen persistent handles\n");
1139 	INIT_LIST_HEAD(&tmp_list);
1140 
1141 	/* list all files open on tree connection, reopen resilient handles  */
1142 	spin_lock(&tcon->open_file_lock);
1143 	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1144 		if (!open_file->invalidHandle)
1145 			continue;
1146 		cifsFileInfo_get(open_file);
1147 		list_add_tail(&open_file->rlist, &tmp_list);
1148 	}
1149 	spin_unlock(&tcon->open_file_lock);
1150 
1151 	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1152 		if (cifs_reopen_file(open_file, false /* do not flush */))
1153 			tcon->need_reopen_files = true;
1154 		list_del_init(&open_file->rlist);
1155 		cifsFileInfo_put(open_file);
1156 	}
1157 }
1158 
1159 int cifs_closedir(struct inode *inode, struct file *file)
1160 {
1161 	int rc = 0;
1162 	unsigned int xid;
1163 	struct cifsFileInfo *cfile = file->private_data;
1164 	struct cifs_tcon *tcon;
1165 	struct TCP_Server_Info *server;
1166 	char *buf;
1167 
1168 	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1169 
1170 	if (cfile == NULL)
1171 		return rc;
1172 
1173 	xid = get_xid();
1174 	tcon = tlink_tcon(cfile->tlink);
1175 	server = tcon->ses->server;
1176 
1177 	cifs_dbg(FYI, "Freeing private data in close dir\n");
1178 	spin_lock(&cfile->file_info_lock);
1179 	if (server->ops->dir_needs_close(cfile)) {
1180 		cfile->invalidHandle = true;
1181 		spin_unlock(&cfile->file_info_lock);
1182 		if (server->ops->close_dir)
1183 			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1184 		else
1185 			rc = -ENOSYS;
1186 		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1187 		/* not much we can do if it fails anyway, ignore rc */
1188 		rc = 0;
1189 	} else
1190 		spin_unlock(&cfile->file_info_lock);
1191 
1192 	buf = cfile->srch_inf.ntwrk_buf_start;
1193 	if (buf) {
1194 		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1195 		cfile->srch_inf.ntwrk_buf_start = NULL;
1196 		if (cfile->srch_inf.smallBuf)
1197 			cifs_small_buf_release(buf);
1198 		else
1199 			cifs_buf_release(buf);
1200 	}
1201 
1202 	cifs_put_tlink(cfile->tlink);
1203 	kfree(file->private_data);
1204 	file->private_data = NULL;
1205 	/* BB can we lock the filestruct while this is going on? */
1206 	free_xid(xid);
1207 	return rc;
1208 }
1209 
1210 static struct cifsLockInfo *
1211 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1212 {
1213 	struct cifsLockInfo *lock =
1214 		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1215 	if (!lock)
1216 		return lock;
1217 	lock->offset = offset;
1218 	lock->length = length;
1219 	lock->type = type;
1220 	lock->pid = current->tgid;
1221 	lock->flags = flags;
1222 	INIT_LIST_HEAD(&lock->blist);
1223 	init_waitqueue_head(&lock->block_q);
1224 	return lock;
1225 }
1226 
1227 void
1228 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1229 {
1230 	struct cifsLockInfo *li, *tmp;
1231 	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1232 		list_del_init(&li->blist);
1233 		wake_up(&li->block_q);
1234 	}
1235 }
1236 
1237 #define CIFS_LOCK_OP	0
1238 #define CIFS_READ_OP	1
1239 #define CIFS_WRITE_OP	2
1240 
1241 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1242 static bool
1243 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1244 			    __u64 length, __u8 type, __u16 flags,
1245 			    struct cifsFileInfo *cfile,
1246 			    struct cifsLockInfo **conf_lock, int rw_check)
1247 {
1248 	struct cifsLockInfo *li;
1249 	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1250 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1251 
1252 	list_for_each_entry(li, &fdlocks->locks, llist) {
1253 		if (offset + length <= li->offset ||
1254 		    offset >= li->offset + li->length)
1255 			continue;
1256 		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1257 		    server->ops->compare_fids(cfile, cur_cfile)) {
1258 			/* shared lock prevents write op through the same fid */
1259 			if (!(li->type & server->vals->shared_lock_type) ||
1260 			    rw_check != CIFS_WRITE_OP)
1261 				continue;
1262 		}
1263 		if ((type & server->vals->shared_lock_type) &&
1264 		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1265 		     current->tgid == li->pid) || type == li->type))
1266 			continue;
1267 		if (rw_check == CIFS_LOCK_OP &&
1268 		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1269 		    server->ops->compare_fids(cfile, cur_cfile))
1270 			continue;
1271 		if (conf_lock)
1272 			*conf_lock = li;
1273 		return true;
1274 	}
1275 	return false;
1276 }
1277 
1278 bool
1279 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1280 			__u8 type, __u16 flags,
1281 			struct cifsLockInfo **conf_lock, int rw_check)
1282 {
1283 	bool rc = false;
1284 	struct cifs_fid_locks *cur;
1285 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1286 
1287 	list_for_each_entry(cur, &cinode->llist, llist) {
1288 		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1289 						 flags, cfile, conf_lock,
1290 						 rw_check);
1291 		if (rc)
1292 			break;
1293 	}
1294 
1295 	return rc;
1296 }
1297 
1298 /*
1299  * Check if there is another lock that prevents us to set the lock (mandatory
1300  * style). If such a lock exists, update the flock structure with its
1301  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1302  * or leave it the same if we can't. Returns 0 if we don't need to request to
1303  * the server or 1 otherwise.
1304  */
1305 static int
1306 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1307 	       __u8 type, struct file_lock *flock)
1308 {
1309 	int rc = 0;
1310 	struct cifsLockInfo *conf_lock;
1311 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1312 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1313 	bool exist;
1314 
1315 	down_read(&cinode->lock_sem);
1316 
1317 	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1318 					flock->c.flc_flags, &conf_lock,
1319 					CIFS_LOCK_OP);
1320 	if (exist) {
1321 		flock->fl_start = conf_lock->offset;
1322 		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1323 		flock->c.flc_pid = conf_lock->pid;
1324 		if (conf_lock->type & server->vals->shared_lock_type)
1325 			flock->c.flc_type = F_RDLCK;
1326 		else
1327 			flock->c.flc_type = F_WRLCK;
1328 	} else if (!cinode->can_cache_brlcks)
1329 		rc = 1;
1330 	else
1331 		flock->c.flc_type = F_UNLCK;
1332 
1333 	up_read(&cinode->lock_sem);
1334 	return rc;
1335 }
1336 
1337 static void
1338 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1339 {
1340 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1341 	cifs_down_write(&cinode->lock_sem);
1342 	list_add_tail(&lock->llist, &cfile->llist->locks);
1343 	up_write(&cinode->lock_sem);
1344 }
1345 
1346 /*
1347  * Set the byte-range lock (mandatory style). Returns:
1348  * 1) 0, if we set the lock and don't need to request to the server;
1349  * 2) 1, if no locks prevent us but we need to request to the server;
1350  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1351  */
1352 static int
1353 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1354 		 bool wait)
1355 {
1356 	struct cifsLockInfo *conf_lock;
1357 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1358 	bool exist;
1359 	int rc = 0;
1360 
1361 try_again:
1362 	exist = false;
1363 	cifs_down_write(&cinode->lock_sem);
1364 
1365 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1366 					lock->type, lock->flags, &conf_lock,
1367 					CIFS_LOCK_OP);
1368 	if (!exist && cinode->can_cache_brlcks) {
1369 		list_add_tail(&lock->llist, &cfile->llist->locks);
1370 		up_write(&cinode->lock_sem);
1371 		return rc;
1372 	}
1373 
1374 	if (!exist)
1375 		rc = 1;
1376 	else if (!wait)
1377 		rc = -EACCES;
1378 	else {
1379 		list_add_tail(&lock->blist, &conf_lock->blist);
1380 		up_write(&cinode->lock_sem);
1381 		rc = wait_event_interruptible(lock->block_q,
1382 					(lock->blist.prev == &lock->blist) &&
1383 					(lock->blist.next == &lock->blist));
1384 		if (!rc)
1385 			goto try_again;
1386 		cifs_down_write(&cinode->lock_sem);
1387 		list_del_init(&lock->blist);
1388 	}
1389 
1390 	up_write(&cinode->lock_sem);
1391 	return rc;
1392 }
1393 
1394 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1395 /*
1396  * Check if there is another lock that prevents us to set the lock (posix
1397  * style). If such a lock exists, update the flock structure with its
1398  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1399  * or leave it the same if we can't. Returns 0 if we don't need to request to
1400  * the server or 1 otherwise.
1401  */
1402 static int
1403 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1404 {
1405 	int rc = 0;
1406 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1407 	unsigned char saved_type = flock->c.flc_type;
1408 
1409 	if ((flock->c.flc_flags & FL_POSIX) == 0)
1410 		return 1;
1411 
1412 	down_read(&cinode->lock_sem);
1413 	posix_test_lock(file, flock);
1414 
1415 	if (lock_is_unlock(flock) && !cinode->can_cache_brlcks) {
1416 		flock->c.flc_type = saved_type;
1417 		rc = 1;
1418 	}
1419 
1420 	up_read(&cinode->lock_sem);
1421 	return rc;
1422 }
1423 
1424 /*
1425  * Set the byte-range lock (posix style). Returns:
1426  * 1) <0, if the error occurs while setting the lock;
1427  * 2) 0, if we set the lock and don't need to request to the server;
1428  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1429  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1430  */
1431 static int
1432 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1433 {
1434 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1435 	int rc = FILE_LOCK_DEFERRED + 1;
1436 
1437 	if ((flock->c.flc_flags & FL_POSIX) == 0)
1438 		return rc;
1439 
1440 	cifs_down_write(&cinode->lock_sem);
1441 	if (!cinode->can_cache_brlcks) {
1442 		up_write(&cinode->lock_sem);
1443 		return rc;
1444 	}
1445 
1446 	rc = posix_lock_file(file, flock, NULL);
1447 	up_write(&cinode->lock_sem);
1448 	return rc;
1449 }
1450 
1451 int
1452 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1453 {
1454 	unsigned int xid;
1455 	int rc = 0, stored_rc;
1456 	struct cifsLockInfo *li, *tmp;
1457 	struct cifs_tcon *tcon;
1458 	unsigned int num, max_num, max_buf;
1459 	LOCKING_ANDX_RANGE *buf, *cur;
1460 	static const int types[] = {
1461 		LOCKING_ANDX_LARGE_FILES,
1462 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1463 	};
1464 	int i;
1465 
1466 	xid = get_xid();
1467 	tcon = tlink_tcon(cfile->tlink);
1468 
1469 	/*
1470 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1471 	 * and check it before using.
1472 	 */
1473 	max_buf = tcon->ses->server->maxBuf;
1474 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1475 		free_xid(xid);
1476 		return -EINVAL;
1477 	}
1478 
1479 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1480 		     PAGE_SIZE);
1481 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1482 			PAGE_SIZE);
1483 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1484 						sizeof(LOCKING_ANDX_RANGE);
1485 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1486 	if (!buf) {
1487 		free_xid(xid);
1488 		return -ENOMEM;
1489 	}
1490 
1491 	for (i = 0; i < 2; i++) {
1492 		cur = buf;
1493 		num = 0;
1494 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1495 			if (li->type != types[i])
1496 				continue;
1497 			cur->Pid = cpu_to_le16(li->pid);
1498 			cur->LengthLow = cpu_to_le32((u32)li->length);
1499 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1500 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1501 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1502 			if (++num == max_num) {
1503 				stored_rc = cifs_lockv(xid, tcon,
1504 						       cfile->fid.netfid,
1505 						       (__u8)li->type, 0, num,
1506 						       buf);
1507 				if (stored_rc)
1508 					rc = stored_rc;
1509 				cur = buf;
1510 				num = 0;
1511 			} else
1512 				cur++;
1513 		}
1514 
1515 		if (num) {
1516 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1517 					       (__u8)types[i], 0, num, buf);
1518 			if (stored_rc)
1519 				rc = stored_rc;
1520 		}
1521 	}
1522 
1523 	kfree(buf);
1524 	free_xid(xid);
1525 	return rc;
1526 }
1527 
1528 static __u32
1529 hash_lockowner(fl_owner_t owner)
1530 {
1531 	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1532 }
1533 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1534 
1535 struct lock_to_push {
1536 	struct list_head llist;
1537 	__u64 offset;
1538 	__u64 length;
1539 	__u32 pid;
1540 	__u16 netfid;
1541 	__u8 type;
1542 };
1543 
1544 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1545 static int
1546 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1547 {
1548 	struct inode *inode = d_inode(cfile->dentry);
1549 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1550 	struct file_lock *flock;
1551 	struct file_lock_context *flctx = locks_inode_context(inode);
1552 	unsigned int count = 0, i;
1553 	int rc = 0, xid, type;
1554 	struct list_head locks_to_send, *el;
1555 	struct lock_to_push *lck, *tmp;
1556 	__u64 length;
1557 
1558 	xid = get_xid();
1559 
1560 	if (!flctx)
1561 		goto out;
1562 
1563 	spin_lock(&flctx->flc_lock);
1564 	list_for_each(el, &flctx->flc_posix) {
1565 		count++;
1566 	}
1567 	spin_unlock(&flctx->flc_lock);
1568 
1569 	INIT_LIST_HEAD(&locks_to_send);
1570 
1571 	/*
1572 	 * Allocating count locks is enough because no FL_POSIX locks can be
1573 	 * added to the list while we are holding cinode->lock_sem that
1574 	 * protects locking operations of this inode.
1575 	 */
1576 	for (i = 0; i < count; i++) {
1577 		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1578 		if (!lck) {
1579 			rc = -ENOMEM;
1580 			goto err_out;
1581 		}
1582 		list_add_tail(&lck->llist, &locks_to_send);
1583 	}
1584 
1585 	el = locks_to_send.next;
1586 	spin_lock(&flctx->flc_lock);
1587 	for_each_file_lock(flock, &flctx->flc_posix) {
1588 		unsigned char ftype = flock->c.flc_type;
1589 
1590 		if (el == &locks_to_send) {
1591 			/*
1592 			 * The list ended. We don't have enough allocated
1593 			 * structures - something is really wrong.
1594 			 */
1595 			cifs_dbg(VFS, "Can't push all brlocks!\n");
1596 			break;
1597 		}
1598 		length = cifs_flock_len(flock);
1599 		if (ftype == F_RDLCK || ftype == F_SHLCK)
1600 			type = CIFS_RDLCK;
1601 		else
1602 			type = CIFS_WRLCK;
1603 		lck = list_entry(el, struct lock_to_push, llist);
1604 		lck->pid = hash_lockowner(flock->c.flc_owner);
1605 		lck->netfid = cfile->fid.netfid;
1606 		lck->length = length;
1607 		lck->type = type;
1608 		lck->offset = flock->fl_start;
1609 	}
1610 	spin_unlock(&flctx->flc_lock);
1611 
1612 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1613 		int stored_rc;
1614 
1615 		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1616 					     lck->offset, lck->length, NULL,
1617 					     lck->type, 0);
1618 		if (stored_rc)
1619 			rc = stored_rc;
1620 		list_del(&lck->llist);
1621 		kfree(lck);
1622 	}
1623 
1624 out:
1625 	free_xid(xid);
1626 	return rc;
1627 err_out:
1628 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1629 		list_del(&lck->llist);
1630 		kfree(lck);
1631 	}
1632 	goto out;
1633 }
1634 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1635 
1636 static int
1637 cifs_push_locks(struct cifsFileInfo *cfile)
1638 {
1639 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1640 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1641 	int rc = 0;
1642 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1643 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1644 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1645 
1646 	/* we are going to update can_cache_brlcks here - need a write access */
1647 	cifs_down_write(&cinode->lock_sem);
1648 	if (!cinode->can_cache_brlcks) {
1649 		up_write(&cinode->lock_sem);
1650 		return rc;
1651 	}
1652 
1653 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1654 	if (cap_unix(tcon->ses) &&
1655 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1656 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1657 		rc = cifs_push_posix_locks(cfile);
1658 	else
1659 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1660 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1661 
1662 	cinode->can_cache_brlcks = false;
1663 	up_write(&cinode->lock_sem);
1664 	return rc;
1665 }
1666 
1667 static void
1668 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1669 		bool *wait_flag, struct TCP_Server_Info *server)
1670 {
1671 	if (flock->c.flc_flags & FL_POSIX)
1672 		cifs_dbg(FYI, "Posix\n");
1673 	if (flock->c.flc_flags & FL_FLOCK)
1674 		cifs_dbg(FYI, "Flock\n");
1675 	if (flock->c.flc_flags & FL_SLEEP) {
1676 		cifs_dbg(FYI, "Blocking lock\n");
1677 		*wait_flag = true;
1678 	}
1679 	if (flock->c.flc_flags & FL_ACCESS)
1680 		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1681 	if (flock->c.flc_flags & FL_LEASE)
1682 		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1683 	if (flock->c.flc_flags &
1684 	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1685 	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1686 		cifs_dbg(FYI, "Unknown lock flags 0x%x\n",
1687 		         flock->c.flc_flags);
1688 
1689 	*type = server->vals->large_lock_type;
1690 	if (lock_is_write(flock)) {
1691 		cifs_dbg(FYI, "F_WRLCK\n");
1692 		*type |= server->vals->exclusive_lock_type;
1693 		*lock = 1;
1694 	} else if (lock_is_unlock(flock)) {
1695 		cifs_dbg(FYI, "F_UNLCK\n");
1696 		*type |= server->vals->unlock_lock_type;
1697 		*unlock = 1;
1698 		/* Check if unlock includes more than one lock range */
1699 	} else if (lock_is_read(flock)) {
1700 		cifs_dbg(FYI, "F_RDLCK\n");
1701 		*type |= server->vals->shared_lock_type;
1702 		*lock = 1;
1703 	} else if (flock->c.flc_type == F_EXLCK) {
1704 		cifs_dbg(FYI, "F_EXLCK\n");
1705 		*type |= server->vals->exclusive_lock_type;
1706 		*lock = 1;
1707 	} else if (flock->c.flc_type == F_SHLCK) {
1708 		cifs_dbg(FYI, "F_SHLCK\n");
1709 		*type |= server->vals->shared_lock_type;
1710 		*lock = 1;
1711 	} else
1712 		cifs_dbg(FYI, "Unknown type of lock\n");
1713 }
1714 
1715 static int
1716 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1717 	   bool wait_flag, bool posix_lck, unsigned int xid)
1718 {
1719 	int rc = 0;
1720 	__u64 length = cifs_flock_len(flock);
1721 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1722 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1723 	struct TCP_Server_Info *server = tcon->ses->server;
1724 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1725 	__u16 netfid = cfile->fid.netfid;
1726 
1727 	if (posix_lck) {
1728 		int posix_lock_type;
1729 
1730 		rc = cifs_posix_lock_test(file, flock);
1731 		if (!rc)
1732 			return rc;
1733 
1734 		if (type & server->vals->shared_lock_type)
1735 			posix_lock_type = CIFS_RDLCK;
1736 		else
1737 			posix_lock_type = CIFS_WRLCK;
1738 		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1739 				      hash_lockowner(flock->c.flc_owner),
1740 				      flock->fl_start, length, flock,
1741 				      posix_lock_type, wait_flag);
1742 		return rc;
1743 	}
1744 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1745 
1746 	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1747 	if (!rc)
1748 		return rc;
1749 
1750 	/* BB we could chain these into one lock request BB */
1751 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1752 				    1, 0, false);
1753 	if (rc == 0) {
1754 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1755 					    type, 0, 1, false);
1756 		flock->c.flc_type = F_UNLCK;
1757 		if (rc != 0)
1758 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1759 				 rc);
1760 		return 0;
1761 	}
1762 
1763 	if (type & server->vals->shared_lock_type) {
1764 		flock->c.flc_type = F_WRLCK;
1765 		return 0;
1766 	}
1767 
1768 	type &= ~server->vals->exclusive_lock_type;
1769 
1770 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1771 				    type | server->vals->shared_lock_type,
1772 				    1, 0, false);
1773 	if (rc == 0) {
1774 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1775 			type | server->vals->shared_lock_type, 0, 1, false);
1776 		flock->c.flc_type = F_RDLCK;
1777 		if (rc != 0)
1778 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1779 				 rc);
1780 	} else
1781 		flock->c.flc_type = F_WRLCK;
1782 
1783 	return 0;
1784 }
1785 
1786 void
1787 cifs_move_llist(struct list_head *source, struct list_head *dest)
1788 {
1789 	struct list_head *li, *tmp;
1790 	list_for_each_safe(li, tmp, source)
1791 		list_move(li, dest);
1792 }
1793 
1794 void
1795 cifs_free_llist(struct list_head *llist)
1796 {
1797 	struct cifsLockInfo *li, *tmp;
1798 	list_for_each_entry_safe(li, tmp, llist, llist) {
1799 		cifs_del_lock_waiters(li);
1800 		list_del(&li->llist);
1801 		kfree(li);
1802 	}
1803 }
1804 
1805 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1806 int
1807 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1808 		  unsigned int xid)
1809 {
1810 	int rc = 0, stored_rc;
1811 	static const int types[] = {
1812 		LOCKING_ANDX_LARGE_FILES,
1813 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1814 	};
1815 	unsigned int i;
1816 	unsigned int max_num, num, max_buf;
1817 	LOCKING_ANDX_RANGE *buf, *cur;
1818 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1819 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1820 	struct cifsLockInfo *li, *tmp;
1821 	__u64 length = cifs_flock_len(flock);
1822 	struct list_head tmp_llist;
1823 
1824 	INIT_LIST_HEAD(&tmp_llist);
1825 
1826 	/*
1827 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1828 	 * and check it before using.
1829 	 */
1830 	max_buf = tcon->ses->server->maxBuf;
1831 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1832 		return -EINVAL;
1833 
1834 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1835 		     PAGE_SIZE);
1836 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1837 			PAGE_SIZE);
1838 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1839 						sizeof(LOCKING_ANDX_RANGE);
1840 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1841 	if (!buf)
1842 		return -ENOMEM;
1843 
1844 	cifs_down_write(&cinode->lock_sem);
1845 	for (i = 0; i < 2; i++) {
1846 		cur = buf;
1847 		num = 0;
1848 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1849 			if (flock->fl_start > li->offset ||
1850 			    (flock->fl_start + length) <
1851 			    (li->offset + li->length))
1852 				continue;
1853 			if (current->tgid != li->pid)
1854 				continue;
1855 			if (types[i] != li->type)
1856 				continue;
1857 			if (cinode->can_cache_brlcks) {
1858 				/*
1859 				 * We can cache brlock requests - simply remove
1860 				 * a lock from the file's list.
1861 				 */
1862 				list_del(&li->llist);
1863 				cifs_del_lock_waiters(li);
1864 				kfree(li);
1865 				continue;
1866 			}
1867 			cur->Pid = cpu_to_le16(li->pid);
1868 			cur->LengthLow = cpu_to_le32((u32)li->length);
1869 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1870 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1871 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1872 			/*
1873 			 * We need to save a lock here to let us add it again to
1874 			 * the file's list if the unlock range request fails on
1875 			 * the server.
1876 			 */
1877 			list_move(&li->llist, &tmp_llist);
1878 			if (++num == max_num) {
1879 				stored_rc = cifs_lockv(xid, tcon,
1880 						       cfile->fid.netfid,
1881 						       li->type, num, 0, buf);
1882 				if (stored_rc) {
1883 					/*
1884 					 * We failed on the unlock range
1885 					 * request - add all locks from the tmp
1886 					 * list to the head of the file's list.
1887 					 */
1888 					cifs_move_llist(&tmp_llist,
1889 							&cfile->llist->locks);
1890 					rc = stored_rc;
1891 				} else
1892 					/*
1893 					 * The unlock range request succeed -
1894 					 * free the tmp list.
1895 					 */
1896 					cifs_free_llist(&tmp_llist);
1897 				cur = buf;
1898 				num = 0;
1899 			} else
1900 				cur++;
1901 		}
1902 		if (num) {
1903 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1904 					       types[i], num, 0, buf);
1905 			if (stored_rc) {
1906 				cifs_move_llist(&tmp_llist,
1907 						&cfile->llist->locks);
1908 				rc = stored_rc;
1909 			} else
1910 				cifs_free_llist(&tmp_llist);
1911 		}
1912 	}
1913 
1914 	up_write(&cinode->lock_sem);
1915 	kfree(buf);
1916 	return rc;
1917 }
1918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1919 
1920 static int
1921 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1922 	   bool wait_flag, bool posix_lck, int lock, int unlock,
1923 	   unsigned int xid)
1924 {
1925 	int rc = 0;
1926 	__u64 length = cifs_flock_len(flock);
1927 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1928 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1929 	struct TCP_Server_Info *server = tcon->ses->server;
1930 	struct inode *inode = d_inode(cfile->dentry);
1931 
1932 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1933 	if (posix_lck) {
1934 		int posix_lock_type;
1935 
1936 		rc = cifs_posix_lock_set(file, flock);
1937 		if (rc <= FILE_LOCK_DEFERRED)
1938 			return rc;
1939 
1940 		if (type & server->vals->shared_lock_type)
1941 			posix_lock_type = CIFS_RDLCK;
1942 		else
1943 			posix_lock_type = CIFS_WRLCK;
1944 
1945 		if (unlock == 1)
1946 			posix_lock_type = CIFS_UNLCK;
1947 
1948 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1949 				      hash_lockowner(flock->c.flc_owner),
1950 				      flock->fl_start, length,
1951 				      NULL, posix_lock_type, wait_flag);
1952 		goto out;
1953 	}
1954 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1955 	if (lock) {
1956 		struct cifsLockInfo *lock;
1957 
1958 		lock = cifs_lock_init(flock->fl_start, length, type,
1959 				      flock->c.flc_flags);
1960 		if (!lock)
1961 			return -ENOMEM;
1962 
1963 		rc = cifs_lock_add_if(cfile, lock, wait_flag);
1964 		if (rc < 0) {
1965 			kfree(lock);
1966 			return rc;
1967 		}
1968 		if (!rc)
1969 			goto out;
1970 
1971 		/*
1972 		 * Windows 7 server can delay breaking lease from read to None
1973 		 * if we set a byte-range lock on a file - break it explicitly
1974 		 * before sending the lock to the server to be sure the next
1975 		 * read won't conflict with non-overlapted locks due to
1976 		 * pagereading.
1977 		 */
1978 		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1979 					CIFS_CACHE_READ(CIFS_I(inode))) {
1980 			cifs_zap_mapping(inode);
1981 			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1982 				 inode);
1983 			CIFS_I(inode)->oplock = 0;
1984 		}
1985 
1986 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1987 					    type, 1, 0, wait_flag);
1988 		if (rc) {
1989 			kfree(lock);
1990 			return rc;
1991 		}
1992 
1993 		cifs_lock_add(cfile, lock);
1994 	} else if (unlock)
1995 		rc = server->ops->mand_unlock_range(cfile, flock, xid);
1996 
1997 out:
1998 	if ((flock->c.flc_flags & FL_POSIX) || (flock->c.flc_flags & FL_FLOCK)) {
1999 		/*
2000 		 * If this is a request to remove all locks because we
2001 		 * are closing the file, it doesn't matter if the
2002 		 * unlocking failed as both cifs.ko and the SMB server
2003 		 * remove the lock on file close
2004 		 */
2005 		if (rc) {
2006 			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2007 			if (!(flock->c.flc_flags & FL_CLOSE))
2008 				return rc;
2009 		}
2010 		rc = locks_lock_file_wait(file, flock);
2011 	}
2012 	return rc;
2013 }
2014 
2015 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2016 {
2017 	int rc, xid;
2018 	int lock = 0, unlock = 0;
2019 	bool wait_flag = false;
2020 	bool posix_lck = false;
2021 	struct cifs_sb_info *cifs_sb;
2022 	struct cifs_tcon *tcon;
2023 	struct cifsFileInfo *cfile;
2024 	__u32 type;
2025 
2026 	xid = get_xid();
2027 
2028 	if (!(fl->c.flc_flags & FL_FLOCK)) {
2029 		rc = -ENOLCK;
2030 		free_xid(xid);
2031 		return rc;
2032 	}
2033 
2034 	cfile = (struct cifsFileInfo *)file->private_data;
2035 	tcon = tlink_tcon(cfile->tlink);
2036 
2037 	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2038 			tcon->ses->server);
2039 	cifs_sb = CIFS_FILE_SB(file);
2040 
2041 	if (cap_unix(tcon->ses) &&
2042 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2043 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2044 		posix_lck = true;
2045 
2046 	if (!lock && !unlock) {
2047 		/*
2048 		 * if no lock or unlock then nothing to do since we do not
2049 		 * know what it is
2050 		 */
2051 		rc = -EOPNOTSUPP;
2052 		free_xid(xid);
2053 		return rc;
2054 	}
2055 
2056 	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2057 			xid);
2058 	free_xid(xid);
2059 	return rc;
2060 
2061 
2062 }
2063 
2064 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2065 {
2066 	int rc, xid;
2067 	int lock = 0, unlock = 0;
2068 	bool wait_flag = false;
2069 	bool posix_lck = false;
2070 	struct cifs_sb_info *cifs_sb;
2071 	struct cifs_tcon *tcon;
2072 	struct cifsFileInfo *cfile;
2073 	__u32 type;
2074 
2075 	rc = -EACCES;
2076 	xid = get_xid();
2077 
2078 	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2079 		 flock->c.flc_flags, flock->c.flc_type,
2080 		 (long long)flock->fl_start,
2081 		 (long long)flock->fl_end);
2082 
2083 	cfile = (struct cifsFileInfo *)file->private_data;
2084 	tcon = tlink_tcon(cfile->tlink);
2085 
2086 	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2087 			tcon->ses->server);
2088 	cifs_sb = CIFS_FILE_SB(file);
2089 	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2090 
2091 	if (cap_unix(tcon->ses) &&
2092 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2093 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2094 		posix_lck = true;
2095 	/*
2096 	 * BB add code here to normalize offset and length to account for
2097 	 * negative length which we can not accept over the wire.
2098 	 */
2099 	if (IS_GETLK(cmd)) {
2100 		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2101 		free_xid(xid);
2102 		return rc;
2103 	}
2104 
2105 	if (!lock && !unlock) {
2106 		/*
2107 		 * if no lock or unlock then nothing to do since we do not
2108 		 * know what it is
2109 		 */
2110 		free_xid(xid);
2111 		return -EOPNOTSUPP;
2112 	}
2113 
2114 	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2115 			xid);
2116 	free_xid(xid);
2117 	return rc;
2118 }
2119 
2120 /*
2121  * update the file size (if needed) after a write. Should be called with
2122  * the inode->i_lock held
2123  */
2124 void
2125 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2126 		      unsigned int bytes_written)
2127 {
2128 	loff_t end_of_write = offset + bytes_written;
2129 
2130 	if (end_of_write > cifsi->netfs.remote_i_size)
2131 		netfs_resize_file(&cifsi->netfs, end_of_write, true);
2132 }
2133 
2134 static ssize_t
2135 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2136 	   size_t write_size, loff_t *offset)
2137 {
2138 	int rc = 0;
2139 	unsigned int bytes_written = 0;
2140 	unsigned int total_written;
2141 	struct cifs_tcon *tcon;
2142 	struct TCP_Server_Info *server;
2143 	unsigned int xid;
2144 	struct dentry *dentry = open_file->dentry;
2145 	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2146 	struct cifs_io_parms io_parms = {0};
2147 
2148 	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2149 		 write_size, *offset, dentry);
2150 
2151 	tcon = tlink_tcon(open_file->tlink);
2152 	server = tcon->ses->server;
2153 
2154 	if (!server->ops->sync_write)
2155 		return -ENOSYS;
2156 
2157 	xid = get_xid();
2158 
2159 	for (total_written = 0; write_size > total_written;
2160 	     total_written += bytes_written) {
2161 		rc = -EAGAIN;
2162 		while (rc == -EAGAIN) {
2163 			struct kvec iov[2];
2164 			unsigned int len;
2165 
2166 			if (open_file->invalidHandle) {
2167 				/* we could deadlock if we called
2168 				   filemap_fdatawait from here so tell
2169 				   reopen_file not to flush data to
2170 				   server now */
2171 				rc = cifs_reopen_file(open_file, false);
2172 				if (rc != 0)
2173 					break;
2174 			}
2175 
2176 			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2177 				  (unsigned int)write_size - total_written);
2178 			/* iov[0] is reserved for smb header */
2179 			iov[1].iov_base = (char *)write_data + total_written;
2180 			iov[1].iov_len = len;
2181 			io_parms.pid = pid;
2182 			io_parms.tcon = tcon;
2183 			io_parms.offset = *offset;
2184 			io_parms.length = len;
2185 			rc = server->ops->sync_write(xid, &open_file->fid,
2186 					&io_parms, &bytes_written, iov, 1);
2187 		}
2188 		if (rc || (bytes_written == 0)) {
2189 			if (total_written)
2190 				break;
2191 			else {
2192 				free_xid(xid);
2193 				return rc;
2194 			}
2195 		} else {
2196 			spin_lock(&d_inode(dentry)->i_lock);
2197 			cifs_update_eof(cifsi, *offset, bytes_written);
2198 			spin_unlock(&d_inode(dentry)->i_lock);
2199 			*offset += bytes_written;
2200 		}
2201 	}
2202 
2203 	cifs_stats_bytes_written(tcon, total_written);
2204 
2205 	if (total_written > 0) {
2206 		spin_lock(&d_inode(dentry)->i_lock);
2207 		if (*offset > d_inode(dentry)->i_size) {
2208 			i_size_write(d_inode(dentry), *offset);
2209 			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2210 		}
2211 		spin_unlock(&d_inode(dentry)->i_lock);
2212 	}
2213 	mark_inode_dirty_sync(d_inode(dentry));
2214 	free_xid(xid);
2215 	return total_written;
2216 }
2217 
2218 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2219 					bool fsuid_only)
2220 {
2221 	struct cifsFileInfo *open_file = NULL;
2222 	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2223 
2224 	/* only filter by fsuid on multiuser mounts */
2225 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2226 		fsuid_only = false;
2227 
2228 	spin_lock(&cifs_inode->open_file_lock);
2229 	/* we could simply get the first_list_entry since write-only entries
2230 	   are always at the end of the list but since the first entry might
2231 	   have a close pending, we go through the whole list */
2232 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2233 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2234 			continue;
2235 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2236 			if ((!open_file->invalidHandle)) {
2237 				/* found a good file */
2238 				/* lock it so it will not be closed on us */
2239 				cifsFileInfo_get(open_file);
2240 				spin_unlock(&cifs_inode->open_file_lock);
2241 				return open_file;
2242 			} /* else might as well continue, and look for
2243 			     another, or simply have the caller reopen it
2244 			     again rather than trying to fix this handle */
2245 		} else /* write only file */
2246 			break; /* write only files are last so must be done */
2247 	}
2248 	spin_unlock(&cifs_inode->open_file_lock);
2249 	return NULL;
2250 }
2251 
2252 /* Return -EBADF if no handle is found and general rc otherwise */
2253 int
2254 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2255 		       struct cifsFileInfo **ret_file)
2256 {
2257 	struct cifsFileInfo *open_file, *inv_file = NULL;
2258 	struct cifs_sb_info *cifs_sb;
2259 	bool any_available = false;
2260 	int rc = -EBADF;
2261 	unsigned int refind = 0;
2262 	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2263 	bool with_delete = flags & FIND_WR_WITH_DELETE;
2264 	*ret_file = NULL;
2265 
2266 	/*
2267 	 * Having a null inode here (because mapping->host was set to zero by
2268 	 * the VFS or MM) should not happen but we had reports of on oops (due
2269 	 * to it being zero) during stress testcases so we need to check for it
2270 	 */
2271 
2272 	if (cifs_inode == NULL) {
2273 		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2274 		dump_stack();
2275 		return rc;
2276 	}
2277 
2278 	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2279 
2280 	/* only filter by fsuid on multiuser mounts */
2281 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2282 		fsuid_only = false;
2283 
2284 	spin_lock(&cifs_inode->open_file_lock);
2285 refind_writable:
2286 	if (refind > MAX_REOPEN_ATT) {
2287 		spin_unlock(&cifs_inode->open_file_lock);
2288 		return rc;
2289 	}
2290 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2291 		if (!any_available && open_file->pid != current->tgid)
2292 			continue;
2293 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2294 			continue;
2295 		if (with_delete && !(open_file->fid.access & DELETE))
2296 			continue;
2297 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2298 			if (!open_file->invalidHandle) {
2299 				/* found a good writable file */
2300 				cifsFileInfo_get(open_file);
2301 				spin_unlock(&cifs_inode->open_file_lock);
2302 				*ret_file = open_file;
2303 				return 0;
2304 			} else {
2305 				if (!inv_file)
2306 					inv_file = open_file;
2307 			}
2308 		}
2309 	}
2310 	/* couldn't find useable FH with same pid, try any available */
2311 	if (!any_available) {
2312 		any_available = true;
2313 		goto refind_writable;
2314 	}
2315 
2316 	if (inv_file) {
2317 		any_available = false;
2318 		cifsFileInfo_get(inv_file);
2319 	}
2320 
2321 	spin_unlock(&cifs_inode->open_file_lock);
2322 
2323 	if (inv_file) {
2324 		rc = cifs_reopen_file(inv_file, false);
2325 		if (!rc) {
2326 			*ret_file = inv_file;
2327 			return 0;
2328 		}
2329 
2330 		spin_lock(&cifs_inode->open_file_lock);
2331 		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2332 		spin_unlock(&cifs_inode->open_file_lock);
2333 		cifsFileInfo_put(inv_file);
2334 		++refind;
2335 		inv_file = NULL;
2336 		spin_lock(&cifs_inode->open_file_lock);
2337 		goto refind_writable;
2338 	}
2339 
2340 	return rc;
2341 }
2342 
2343 struct cifsFileInfo *
2344 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2345 {
2346 	struct cifsFileInfo *cfile;
2347 	int rc;
2348 
2349 	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2350 	if (rc)
2351 		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2352 
2353 	return cfile;
2354 }
2355 
2356 int
2357 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2358 		       int flags,
2359 		       struct cifsFileInfo **ret_file)
2360 {
2361 	struct cifsFileInfo *cfile;
2362 	void *page = alloc_dentry_path();
2363 
2364 	*ret_file = NULL;
2365 
2366 	spin_lock(&tcon->open_file_lock);
2367 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2368 		struct cifsInodeInfo *cinode;
2369 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2370 		if (IS_ERR(full_path)) {
2371 			spin_unlock(&tcon->open_file_lock);
2372 			free_dentry_path(page);
2373 			return PTR_ERR(full_path);
2374 		}
2375 		if (strcmp(full_path, name))
2376 			continue;
2377 
2378 		cinode = CIFS_I(d_inode(cfile->dentry));
2379 		spin_unlock(&tcon->open_file_lock);
2380 		free_dentry_path(page);
2381 		return cifs_get_writable_file(cinode, flags, ret_file);
2382 	}
2383 
2384 	spin_unlock(&tcon->open_file_lock);
2385 	free_dentry_path(page);
2386 	return -ENOENT;
2387 }
2388 
2389 int
2390 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2391 		       struct cifsFileInfo **ret_file)
2392 {
2393 	struct cifsFileInfo *cfile;
2394 	void *page = alloc_dentry_path();
2395 
2396 	*ret_file = NULL;
2397 
2398 	spin_lock(&tcon->open_file_lock);
2399 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2400 		struct cifsInodeInfo *cinode;
2401 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2402 		if (IS_ERR(full_path)) {
2403 			spin_unlock(&tcon->open_file_lock);
2404 			free_dentry_path(page);
2405 			return PTR_ERR(full_path);
2406 		}
2407 		if (strcmp(full_path, name))
2408 			continue;
2409 
2410 		cinode = CIFS_I(d_inode(cfile->dentry));
2411 		spin_unlock(&tcon->open_file_lock);
2412 		free_dentry_path(page);
2413 		*ret_file = find_readable_file(cinode, 0);
2414 		return *ret_file ? 0 : -ENOENT;
2415 	}
2416 
2417 	spin_unlock(&tcon->open_file_lock);
2418 	free_dentry_path(page);
2419 	return -ENOENT;
2420 }
2421 
2422 void
2423 cifs_writedata_release(struct kref *refcount)
2424 {
2425 	struct cifs_writedata *wdata = container_of(refcount,
2426 					struct cifs_writedata, refcount);
2427 #ifdef CONFIG_CIFS_SMB_DIRECT
2428 	if (wdata->mr) {
2429 		smbd_deregister_mr(wdata->mr);
2430 		wdata->mr = NULL;
2431 	}
2432 #endif
2433 
2434 	if (wdata->cfile)
2435 		cifsFileInfo_put(wdata->cfile);
2436 
2437 	kfree(wdata);
2438 }
2439 
2440 /*
2441  * Write failed with a retryable error. Resend the write request. It's also
2442  * possible that the page was redirtied so re-clean the page.
2443  */
2444 static void
2445 cifs_writev_requeue(struct cifs_writedata *wdata)
2446 {
2447 	int rc = 0;
2448 	struct inode *inode = d_inode(wdata->cfile->dentry);
2449 	struct TCP_Server_Info *server;
2450 	unsigned int rest_len = wdata->bytes;
2451 	loff_t fpos = wdata->offset;
2452 
2453 	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2454 	do {
2455 		struct cifs_writedata *wdata2;
2456 		unsigned int wsize, cur_len;
2457 
2458 		wsize = server->ops->wp_retry_size(inode);
2459 		if (wsize < rest_len) {
2460 			if (wsize < PAGE_SIZE) {
2461 				rc = -EOPNOTSUPP;
2462 				break;
2463 			}
2464 			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2465 		} else {
2466 			cur_len = rest_len;
2467 		}
2468 
2469 		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2470 		if (!wdata2) {
2471 			rc = -ENOMEM;
2472 			break;
2473 		}
2474 
2475 		wdata2->sync_mode = wdata->sync_mode;
2476 		wdata2->offset	= fpos;
2477 		wdata2->bytes	= cur_len;
2478 		wdata2->iter	= wdata->iter;
2479 
2480 		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2481 		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2482 
2483 		if (iov_iter_is_xarray(&wdata2->iter))
2484 			/* Check for pages having been redirtied and clean
2485 			 * them.  We can do this by walking the xarray.  If
2486 			 * it's not an xarray, then it's a DIO and we shouldn't
2487 			 * be mucking around with the page bits.
2488 			 */
2489 			cifs_undirty_folios(inode, fpos, cur_len);
2490 
2491 		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2492 					    &wdata2->cfile);
2493 		if (!wdata2->cfile) {
2494 			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2495 				 rc);
2496 			if (!is_retryable_error(rc))
2497 				rc = -EBADF;
2498 		} else {
2499 			wdata2->pid = wdata2->cfile->pid;
2500 			rc = server->ops->async_writev(wdata2,
2501 						       cifs_writedata_release);
2502 		}
2503 
2504 		kref_put(&wdata2->refcount, cifs_writedata_release);
2505 		if (rc) {
2506 			if (is_retryable_error(rc))
2507 				continue;
2508 			fpos += cur_len;
2509 			rest_len -= cur_len;
2510 			break;
2511 		}
2512 
2513 		fpos += cur_len;
2514 		rest_len -= cur_len;
2515 	} while (rest_len > 0);
2516 
2517 	/* Clean up remaining pages from the original wdata */
2518 	if (iov_iter_is_xarray(&wdata->iter))
2519 		cifs_pages_write_failed(inode, fpos, rest_len);
2520 
2521 	if (rc != 0 && !is_retryable_error(rc))
2522 		mapping_set_error(inode->i_mapping, rc);
2523 	kref_put(&wdata->refcount, cifs_writedata_release);
2524 }
2525 
2526 void
2527 cifs_writev_complete(struct work_struct *work)
2528 {
2529 	struct cifs_writedata *wdata = container_of(work,
2530 						struct cifs_writedata, work);
2531 	struct inode *inode = d_inode(wdata->cfile->dentry);
2532 
2533 	if (wdata->result == 0) {
2534 		spin_lock(&inode->i_lock);
2535 		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2536 		spin_unlock(&inode->i_lock);
2537 		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2538 					 wdata->bytes);
2539 	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2540 		return cifs_writev_requeue(wdata);
2541 
2542 	if (wdata->result == -EAGAIN)
2543 		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2544 	else if (wdata->result < 0)
2545 		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2546 	else
2547 		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2548 
2549 	if (wdata->result != -EAGAIN)
2550 		mapping_set_error(inode->i_mapping, wdata->result);
2551 	kref_put(&wdata->refcount, cifs_writedata_release);
2552 }
2553 
2554 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2555 {
2556 	struct cifs_writedata *wdata;
2557 
2558 	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2559 	if (wdata != NULL) {
2560 		kref_init(&wdata->refcount);
2561 		INIT_LIST_HEAD(&wdata->list);
2562 		init_completion(&wdata->done);
2563 		INIT_WORK(&wdata->work, complete);
2564 	}
2565 	return wdata;
2566 }
2567 
2568 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2569 {
2570 	struct address_space *mapping = page->mapping;
2571 	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2572 	char *write_data;
2573 	int rc = -EFAULT;
2574 	int bytes_written = 0;
2575 	struct inode *inode;
2576 	struct cifsFileInfo *open_file;
2577 
2578 	if (!mapping || !mapping->host)
2579 		return -EFAULT;
2580 
2581 	inode = page->mapping->host;
2582 
2583 	offset += (loff_t)from;
2584 	write_data = kmap(page);
2585 	write_data += from;
2586 
2587 	if ((to > PAGE_SIZE) || (from > to)) {
2588 		kunmap(page);
2589 		return -EIO;
2590 	}
2591 
2592 	/* racing with truncate? */
2593 	if (offset > mapping->host->i_size) {
2594 		kunmap(page);
2595 		return 0; /* don't care */
2596 	}
2597 
2598 	/* check to make sure that we are not extending the file */
2599 	if (mapping->host->i_size - offset < (loff_t)to)
2600 		to = (unsigned)(mapping->host->i_size - offset);
2601 
2602 	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2603 				    &open_file);
2604 	if (!rc) {
2605 		bytes_written = cifs_write(open_file, open_file->pid,
2606 					   write_data, to - from, &offset);
2607 		cifsFileInfo_put(open_file);
2608 		/* Does mm or vfs already set times? */
2609 		simple_inode_init_ts(inode);
2610 		if ((bytes_written > 0) && (offset))
2611 			rc = 0;
2612 		else if (bytes_written < 0)
2613 			rc = bytes_written;
2614 		else
2615 			rc = -EFAULT;
2616 	} else {
2617 		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2618 		if (!is_retryable_error(rc))
2619 			rc = -EIO;
2620 	}
2621 
2622 	kunmap(page);
2623 	return rc;
2624 }
2625 
2626 /*
2627  * Extend the region to be written back to include subsequent contiguously
2628  * dirty pages if possible, but don't sleep while doing so.
2629  */
2630 static void cifs_extend_writeback(struct address_space *mapping,
2631 				  long *_count,
2632 				  loff_t start,
2633 				  int max_pages,
2634 				  size_t max_len,
2635 				  unsigned int *_len)
2636 {
2637 	struct folio_batch batch;
2638 	struct folio *folio;
2639 	unsigned int psize, nr_pages;
2640 	size_t len = *_len;
2641 	pgoff_t index = (start + len) / PAGE_SIZE;
2642 	bool stop = true;
2643 	unsigned int i;
2644 	XA_STATE(xas, &mapping->i_pages, index);
2645 
2646 	folio_batch_init(&batch);
2647 
2648 	do {
2649 		/* Firstly, we gather up a batch of contiguous dirty pages
2650 		 * under the RCU read lock - but we can't clear the dirty flags
2651 		 * there if any of those pages are mapped.
2652 		 */
2653 		rcu_read_lock();
2654 
2655 		xas_for_each(&xas, folio, ULONG_MAX) {
2656 			stop = true;
2657 			if (xas_retry(&xas, folio))
2658 				continue;
2659 			if (xa_is_value(folio))
2660 				break;
2661 			if (folio->index != index)
2662 				break;
2663 			if (!folio_try_get_rcu(folio)) {
2664 				xas_reset(&xas);
2665 				continue;
2666 			}
2667 			nr_pages = folio_nr_pages(folio);
2668 			if (nr_pages > max_pages)
2669 				break;
2670 
2671 			/* Has the page moved or been split? */
2672 			if (unlikely(folio != xas_reload(&xas))) {
2673 				folio_put(folio);
2674 				break;
2675 			}
2676 
2677 			if (!folio_trylock(folio)) {
2678 				folio_put(folio);
2679 				break;
2680 			}
2681 			if (!folio_test_dirty(folio) || folio_test_writeback(folio)) {
2682 				folio_unlock(folio);
2683 				folio_put(folio);
2684 				break;
2685 			}
2686 
2687 			max_pages -= nr_pages;
2688 			psize = folio_size(folio);
2689 			len += psize;
2690 			stop = false;
2691 			if (max_pages <= 0 || len >= max_len || *_count <= 0)
2692 				stop = true;
2693 
2694 			index += nr_pages;
2695 			if (!folio_batch_add(&batch, folio))
2696 				break;
2697 			if (stop)
2698 				break;
2699 		}
2700 
2701 		if (!stop)
2702 			xas_pause(&xas);
2703 		rcu_read_unlock();
2704 
2705 		/* Now, if we obtained any pages, we can shift them to being
2706 		 * writable and mark them for caching.
2707 		 */
2708 		if (!folio_batch_count(&batch))
2709 			break;
2710 
2711 		for (i = 0; i < folio_batch_count(&batch); i++) {
2712 			folio = batch.folios[i];
2713 			/* The folio should be locked, dirty and not undergoing
2714 			 * writeback from the loop above.
2715 			 */
2716 			if (!folio_clear_dirty_for_io(folio))
2717 				WARN_ON(1);
2718 			folio_start_writeback(folio);
2719 
2720 			*_count -= folio_nr_pages(folio);
2721 			folio_unlock(folio);
2722 		}
2723 
2724 		folio_batch_release(&batch);
2725 		cond_resched();
2726 	} while (!stop);
2727 
2728 	*_len = len;
2729 }
2730 
2731 /*
2732  * Write back the locked page and any subsequent non-locked dirty pages.
2733  */
2734 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2735 						 struct writeback_control *wbc,
2736 						 struct folio *folio,
2737 						 loff_t start, loff_t end)
2738 {
2739 	struct inode *inode = mapping->host;
2740 	struct TCP_Server_Info *server;
2741 	struct cifs_writedata *wdata;
2742 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2743 	struct cifs_credits credits_on_stack;
2744 	struct cifs_credits *credits = &credits_on_stack;
2745 	struct cifsFileInfo *cfile = NULL;
2746 	unsigned int xid, wsize, len;
2747 	loff_t i_size = i_size_read(inode);
2748 	size_t max_len;
2749 	long count = wbc->nr_to_write;
2750 	int rc;
2751 
2752 	/* The folio should be locked, dirty and not undergoing writeback. */
2753 	folio_start_writeback(folio);
2754 
2755 	count -= folio_nr_pages(folio);
2756 	len = folio_size(folio);
2757 
2758 	xid = get_xid();
2759 	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2760 
2761 	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2762 	if (rc) {
2763 		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2764 		goto err_xid;
2765 	}
2766 
2767 	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2768 					   &wsize, credits);
2769 	if (rc != 0)
2770 		goto err_close;
2771 
2772 	wdata = cifs_writedata_alloc(cifs_writev_complete);
2773 	if (!wdata) {
2774 		rc = -ENOMEM;
2775 		goto err_uncredit;
2776 	}
2777 
2778 	wdata->sync_mode = wbc->sync_mode;
2779 	wdata->offset = folio_pos(folio);
2780 	wdata->pid = cfile->pid;
2781 	wdata->credits = credits_on_stack;
2782 	wdata->cfile = cfile;
2783 	wdata->server = server;
2784 	cfile = NULL;
2785 
2786 	/* Find all consecutive lockable dirty pages, stopping when we find a
2787 	 * page that is not immediately lockable, is not dirty or is missing,
2788 	 * or we reach the end of the range.
2789 	 */
2790 	if (start < i_size) {
2791 		/* Trim the write to the EOF; the extra data is ignored.  Also
2792 		 * put an upper limit on the size of a single storedata op.
2793 		 */
2794 		max_len = wsize;
2795 		max_len = min_t(unsigned long long, max_len, end - start + 1);
2796 		max_len = min_t(unsigned long long, max_len, i_size - start);
2797 
2798 		if (len < max_len) {
2799 			int max_pages = INT_MAX;
2800 
2801 #ifdef CONFIG_CIFS_SMB_DIRECT
2802 			if (server->smbd_conn)
2803 				max_pages = server->smbd_conn->max_frmr_depth;
2804 #endif
2805 			max_pages -= folio_nr_pages(folio);
2806 
2807 			if (max_pages > 0)
2808 				cifs_extend_writeback(mapping, &count, start,
2809 						      max_pages, max_len, &len);
2810 		}
2811 		len = min_t(loff_t, len, max_len);
2812 	}
2813 
2814 	wdata->bytes = len;
2815 
2816 	/* We now have a contiguous set of dirty pages, each with writeback
2817 	 * set; the first page is still locked at this point, but all the rest
2818 	 * have been unlocked.
2819 	 */
2820 	folio_unlock(folio);
2821 
2822 	if (start < i_size) {
2823 		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2824 				start, len);
2825 
2826 		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2827 		if (rc)
2828 			goto err_wdata;
2829 
2830 		if (wdata->cfile->invalidHandle)
2831 			rc = -EAGAIN;
2832 		else
2833 			rc = wdata->server->ops->async_writev(wdata,
2834 							      cifs_writedata_release);
2835 		if (rc >= 0) {
2836 			kref_put(&wdata->refcount, cifs_writedata_release);
2837 			goto err_close;
2838 		}
2839 	} else {
2840 		/* The dirty region was entirely beyond the EOF. */
2841 		cifs_pages_written_back(inode, start, len);
2842 		rc = 0;
2843 	}
2844 
2845 err_wdata:
2846 	kref_put(&wdata->refcount, cifs_writedata_release);
2847 err_uncredit:
2848 	add_credits_and_wake_if(server, credits, 0);
2849 err_close:
2850 	if (cfile)
2851 		cifsFileInfo_put(cfile);
2852 err_xid:
2853 	free_xid(xid);
2854 	if (rc == 0) {
2855 		wbc->nr_to_write = count;
2856 		rc = len;
2857 	} else if (is_retryable_error(rc)) {
2858 		cifs_pages_write_redirty(inode, start, len);
2859 	} else {
2860 		cifs_pages_write_failed(inode, start, len);
2861 		mapping_set_error(mapping, rc);
2862 	}
2863 	/* Indication to update ctime and mtime as close is deferred */
2864 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2865 	return rc;
2866 }
2867 
2868 /*
2869  * write a region of pages back to the server
2870  */
2871 static int cifs_writepages_region(struct address_space *mapping,
2872 				  struct writeback_control *wbc,
2873 				  loff_t start, loff_t end, loff_t *_next)
2874 {
2875 	struct folio_batch fbatch;
2876 	int skips = 0;
2877 
2878 	folio_batch_init(&fbatch);
2879 	do {
2880 		int nr;
2881 		pgoff_t index = start / PAGE_SIZE;
2882 
2883 		nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
2884 					    PAGECACHE_TAG_DIRTY, &fbatch);
2885 		if (!nr)
2886 			break;
2887 
2888 		for (int i = 0; i < nr; i++) {
2889 			ssize_t ret;
2890 			struct folio *folio = fbatch.folios[i];
2891 
2892 redo_folio:
2893 			start = folio_pos(folio); /* May regress with THPs */
2894 
2895 			/* At this point we hold neither the i_pages lock nor the
2896 			 * page lock: the page may be truncated or invalidated
2897 			 * (changing page->mapping to NULL), or even swizzled
2898 			 * back from swapper_space to tmpfs file mapping
2899 			 */
2900 			if (wbc->sync_mode != WB_SYNC_NONE) {
2901 				ret = folio_lock_killable(folio);
2902 				if (ret < 0)
2903 					goto write_error;
2904 			} else {
2905 				if (!folio_trylock(folio))
2906 					goto skip_write;
2907 			}
2908 
2909 			if (folio->mapping != mapping ||
2910 			    !folio_test_dirty(folio)) {
2911 				start += folio_size(folio);
2912 				folio_unlock(folio);
2913 				continue;
2914 			}
2915 
2916 			if (folio_test_writeback(folio) ||
2917 			    folio_test_fscache(folio)) {
2918 				folio_unlock(folio);
2919 				if (wbc->sync_mode == WB_SYNC_NONE)
2920 					goto skip_write;
2921 
2922 				folio_wait_writeback(folio);
2923 #ifdef CONFIG_CIFS_FSCACHE
2924 				folio_wait_fscache(folio);
2925 #endif
2926 				goto redo_folio;
2927 			}
2928 
2929 			if (!folio_clear_dirty_for_io(folio))
2930 				/* We hold the page lock - it should've been dirty. */
2931 				WARN_ON(1);
2932 
2933 			ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
2934 			if (ret < 0)
2935 				goto write_error;
2936 
2937 			start += ret;
2938 			continue;
2939 
2940 write_error:
2941 			folio_batch_release(&fbatch);
2942 			*_next = start;
2943 			return ret;
2944 
2945 skip_write:
2946 			/*
2947 			 * Too many skipped writes, or need to reschedule?
2948 			 * Treat it as a write error without an error code.
2949 			 */
2950 			if (skips >= 5 || need_resched()) {
2951 				ret = 0;
2952 				goto write_error;
2953 			}
2954 
2955 			/* Otherwise, just skip that folio and go on to the next */
2956 			skips++;
2957 			start += folio_size(folio);
2958 			continue;
2959 		}
2960 
2961 		folio_batch_release(&fbatch);
2962 		cond_resched();
2963 	} while (wbc->nr_to_write > 0);
2964 
2965 	*_next = start;
2966 	return 0;
2967 }
2968 
2969 /*
2970  * Write some of the pending data back to the server
2971  */
2972 static int cifs_writepages(struct address_space *mapping,
2973 			   struct writeback_control *wbc)
2974 {
2975 	loff_t start, next;
2976 	int ret;
2977 
2978 	/* We have to be careful as we can end up racing with setattr()
2979 	 * truncating the pagecache since the caller doesn't take a lock here
2980 	 * to prevent it.
2981 	 */
2982 
2983 	if (wbc->range_cyclic) {
2984 		start = mapping->writeback_index * PAGE_SIZE;
2985 		ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
2986 		if (ret == 0) {
2987 			mapping->writeback_index = next / PAGE_SIZE;
2988 			if (start > 0 && wbc->nr_to_write > 0) {
2989 				ret = cifs_writepages_region(mapping, wbc, 0,
2990 							     start, &next);
2991 				if (ret == 0)
2992 					mapping->writeback_index =
2993 						next / PAGE_SIZE;
2994 			}
2995 		}
2996 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
2997 		ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
2998 		if (wbc->nr_to_write > 0 && ret == 0)
2999 			mapping->writeback_index = next / PAGE_SIZE;
3000 	} else {
3001 		ret = cifs_writepages_region(mapping, wbc,
3002 					     wbc->range_start, wbc->range_end, &next);
3003 	}
3004 
3005 	return ret;
3006 }
3007 
3008 static int
3009 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3010 {
3011 	int rc;
3012 	unsigned int xid;
3013 
3014 	xid = get_xid();
3015 /* BB add check for wbc flags */
3016 	get_page(page);
3017 	if (!PageUptodate(page))
3018 		cifs_dbg(FYI, "ppw - page not up to date\n");
3019 
3020 	/*
3021 	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3022 	 *
3023 	 * A writepage() implementation always needs to do either this,
3024 	 * or re-dirty the page with "redirty_page_for_writepage()" in
3025 	 * the case of a failure.
3026 	 *
3027 	 * Just unlocking the page will cause the radix tree tag-bits
3028 	 * to fail to update with the state of the page correctly.
3029 	 */
3030 	set_page_writeback(page);
3031 retry_write:
3032 	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3033 	if (is_retryable_error(rc)) {
3034 		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3035 			goto retry_write;
3036 		redirty_page_for_writepage(wbc, page);
3037 	} else if (rc != 0) {
3038 		SetPageError(page);
3039 		mapping_set_error(page->mapping, rc);
3040 	} else {
3041 		SetPageUptodate(page);
3042 	}
3043 	end_page_writeback(page);
3044 	put_page(page);
3045 	free_xid(xid);
3046 	return rc;
3047 }
3048 
3049 static int cifs_write_end(struct file *file, struct address_space *mapping,
3050 			loff_t pos, unsigned len, unsigned copied,
3051 			struct page *page, void *fsdata)
3052 {
3053 	int rc;
3054 	struct inode *inode = mapping->host;
3055 	struct cifsFileInfo *cfile = file->private_data;
3056 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3057 	struct folio *folio = page_folio(page);
3058 	__u32 pid;
3059 
3060 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3061 		pid = cfile->pid;
3062 	else
3063 		pid = current->tgid;
3064 
3065 	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3066 		 page, pos, copied);
3067 
3068 	if (folio_test_checked(folio)) {
3069 		if (copied == len)
3070 			folio_mark_uptodate(folio);
3071 		folio_clear_checked(folio);
3072 	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3073 		folio_mark_uptodate(folio);
3074 
3075 	if (!folio_test_uptodate(folio)) {
3076 		char *page_data;
3077 		unsigned offset = pos & (PAGE_SIZE - 1);
3078 		unsigned int xid;
3079 
3080 		xid = get_xid();
3081 		/* this is probably better than directly calling
3082 		   partialpage_write since in this function the file handle is
3083 		   known which we might as well	leverage */
3084 		/* BB check if anything else missing out of ppw
3085 		   such as updating last write time */
3086 		page_data = kmap(page);
3087 		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3088 		/* if (rc < 0) should we set writebehind rc? */
3089 		kunmap(page);
3090 
3091 		free_xid(xid);
3092 	} else {
3093 		rc = copied;
3094 		pos += copied;
3095 		set_page_dirty(page);
3096 	}
3097 
3098 	if (rc > 0) {
3099 		spin_lock(&inode->i_lock);
3100 		if (pos > inode->i_size) {
3101 			i_size_write(inode, pos);
3102 			inode->i_blocks = (512 - 1 + pos) >> 9;
3103 		}
3104 		spin_unlock(&inode->i_lock);
3105 	}
3106 
3107 	unlock_page(page);
3108 	put_page(page);
3109 	/* Indication to update ctime and mtime as close is deferred */
3110 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3111 
3112 	return rc;
3113 }
3114 
3115 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3116 		      int datasync)
3117 {
3118 	unsigned int xid;
3119 	int rc = 0;
3120 	struct cifs_tcon *tcon;
3121 	struct TCP_Server_Info *server;
3122 	struct cifsFileInfo *smbfile = file->private_data;
3123 	struct inode *inode = file_inode(file);
3124 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3125 
3126 	rc = file_write_and_wait_range(file, start, end);
3127 	if (rc) {
3128 		trace_cifs_fsync_err(inode->i_ino, rc);
3129 		return rc;
3130 	}
3131 
3132 	xid = get_xid();
3133 
3134 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3135 		 file, datasync);
3136 
3137 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3138 		rc = cifs_zap_mapping(inode);
3139 		if (rc) {
3140 			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3141 			rc = 0; /* don't care about it in fsync */
3142 		}
3143 	}
3144 
3145 	tcon = tlink_tcon(smbfile->tlink);
3146 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3147 		server = tcon->ses->server;
3148 		if (server->ops->flush == NULL) {
3149 			rc = -ENOSYS;
3150 			goto strict_fsync_exit;
3151 		}
3152 
3153 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3154 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3155 			if (smbfile) {
3156 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3157 				cifsFileInfo_put(smbfile);
3158 			} else
3159 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3160 		} else
3161 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3162 	}
3163 
3164 strict_fsync_exit:
3165 	free_xid(xid);
3166 	return rc;
3167 }
3168 
3169 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3170 {
3171 	unsigned int xid;
3172 	int rc = 0;
3173 	struct cifs_tcon *tcon;
3174 	struct TCP_Server_Info *server;
3175 	struct cifsFileInfo *smbfile = file->private_data;
3176 	struct inode *inode = file_inode(file);
3177 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3178 
3179 	rc = file_write_and_wait_range(file, start, end);
3180 	if (rc) {
3181 		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3182 		return rc;
3183 	}
3184 
3185 	xid = get_xid();
3186 
3187 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3188 		 file, datasync);
3189 
3190 	tcon = tlink_tcon(smbfile->tlink);
3191 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3192 		server = tcon->ses->server;
3193 		if (server->ops->flush == NULL) {
3194 			rc = -ENOSYS;
3195 			goto fsync_exit;
3196 		}
3197 
3198 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3199 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3200 			if (smbfile) {
3201 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3202 				cifsFileInfo_put(smbfile);
3203 			} else
3204 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3205 		} else
3206 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3207 	}
3208 
3209 fsync_exit:
3210 	free_xid(xid);
3211 	return rc;
3212 }
3213 
3214 /*
3215  * As file closes, flush all cached write data for this inode checking
3216  * for write behind errors.
3217  */
3218 int cifs_flush(struct file *file, fl_owner_t id)
3219 {
3220 	struct inode *inode = file_inode(file);
3221 	int rc = 0;
3222 
3223 	if (file->f_mode & FMODE_WRITE)
3224 		rc = filemap_write_and_wait(inode->i_mapping);
3225 
3226 	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3227 	if (rc) {
3228 		/* get more nuanced writeback errors */
3229 		rc = filemap_check_wb_err(file->f_mapping, 0);
3230 		trace_cifs_flush_err(inode->i_ino, rc);
3231 	}
3232 	return rc;
3233 }
3234 
3235 static void
3236 cifs_uncached_writedata_release(struct kref *refcount)
3237 {
3238 	struct cifs_writedata *wdata = container_of(refcount,
3239 					struct cifs_writedata, refcount);
3240 
3241 	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3242 	cifs_writedata_release(refcount);
3243 }
3244 
3245 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3246 
3247 static void
3248 cifs_uncached_writev_complete(struct work_struct *work)
3249 {
3250 	struct cifs_writedata *wdata = container_of(work,
3251 					struct cifs_writedata, work);
3252 	struct inode *inode = d_inode(wdata->cfile->dentry);
3253 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3254 
3255 	spin_lock(&inode->i_lock);
3256 	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3257 	if (cifsi->netfs.remote_i_size > inode->i_size)
3258 		i_size_write(inode, cifsi->netfs.remote_i_size);
3259 	spin_unlock(&inode->i_lock);
3260 
3261 	complete(&wdata->done);
3262 	collect_uncached_write_data(wdata->ctx);
3263 	/* the below call can possibly free the last ref to aio ctx */
3264 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3265 }
3266 
3267 static int
3268 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3269 	struct cifs_aio_ctx *ctx)
3270 {
3271 	unsigned int wsize;
3272 	struct cifs_credits credits;
3273 	int rc;
3274 	struct TCP_Server_Info *server = wdata->server;
3275 
3276 	do {
3277 		if (wdata->cfile->invalidHandle) {
3278 			rc = cifs_reopen_file(wdata->cfile, false);
3279 			if (rc == -EAGAIN)
3280 				continue;
3281 			else if (rc)
3282 				break;
3283 		}
3284 
3285 
3286 		/*
3287 		 * Wait for credits to resend this wdata.
3288 		 * Note: we are attempting to resend the whole wdata not in
3289 		 * segments
3290 		 */
3291 		do {
3292 			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3293 						&wsize, &credits);
3294 			if (rc)
3295 				goto fail;
3296 
3297 			if (wsize < wdata->bytes) {
3298 				add_credits_and_wake_if(server, &credits, 0);
3299 				msleep(1000);
3300 			}
3301 		} while (wsize < wdata->bytes);
3302 		wdata->credits = credits;
3303 
3304 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3305 
3306 		if (!rc) {
3307 			if (wdata->cfile->invalidHandle)
3308 				rc = -EAGAIN;
3309 			else {
3310 				wdata->replay = true;
3311 #ifdef CONFIG_CIFS_SMB_DIRECT
3312 				if (wdata->mr) {
3313 					wdata->mr->need_invalidate = true;
3314 					smbd_deregister_mr(wdata->mr);
3315 					wdata->mr = NULL;
3316 				}
3317 #endif
3318 				rc = server->ops->async_writev(wdata,
3319 					cifs_uncached_writedata_release);
3320 			}
3321 		}
3322 
3323 		/* If the write was successfully sent, we are done */
3324 		if (!rc) {
3325 			list_add_tail(&wdata->list, wdata_list);
3326 			return 0;
3327 		}
3328 
3329 		/* Roll back credits and retry if needed */
3330 		add_credits_and_wake_if(server, &wdata->credits, 0);
3331 	} while (rc == -EAGAIN);
3332 
3333 fail:
3334 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3335 	return rc;
3336 }
3337 
3338 /*
3339  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3340  * size and maximum number of segments.
3341  */
3342 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3343 				     size_t max_segs, unsigned int *_nsegs)
3344 {
3345 	const struct bio_vec *bvecs = iter->bvec;
3346 	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3347 	size_t len, span = 0, n = iter->count;
3348 	size_t skip = iter->iov_offset;
3349 
3350 	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3351 		return 0;
3352 
3353 	while (n && ix < nbv && skip) {
3354 		len = bvecs[ix].bv_len;
3355 		if (skip < len)
3356 			break;
3357 		skip -= len;
3358 		n -= len;
3359 		ix++;
3360 	}
3361 
3362 	while (n && ix < nbv) {
3363 		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3364 		span += len;
3365 		max_size -= len;
3366 		nsegs++;
3367 		ix++;
3368 		if (max_size == 0 || nsegs >= max_segs)
3369 			break;
3370 		skip = 0;
3371 		n -= len;
3372 	}
3373 
3374 	*_nsegs = nsegs;
3375 	return span;
3376 }
3377 
3378 static int
3379 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3380 		     struct cifsFileInfo *open_file,
3381 		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3382 		     struct cifs_aio_ctx *ctx)
3383 {
3384 	int rc = 0;
3385 	size_t cur_len, max_len;
3386 	struct cifs_writedata *wdata;
3387 	pid_t pid;
3388 	struct TCP_Server_Info *server;
3389 	unsigned int xid, max_segs = INT_MAX;
3390 
3391 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3392 		pid = open_file->pid;
3393 	else
3394 		pid = current->tgid;
3395 
3396 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3397 	xid = get_xid();
3398 
3399 #ifdef CONFIG_CIFS_SMB_DIRECT
3400 	if (server->smbd_conn)
3401 		max_segs = server->smbd_conn->max_frmr_depth;
3402 #endif
3403 
3404 	do {
3405 		struct cifs_credits credits_on_stack;
3406 		struct cifs_credits *credits = &credits_on_stack;
3407 		unsigned int wsize, nsegs = 0;
3408 
3409 		if (signal_pending(current)) {
3410 			rc = -EINTR;
3411 			break;
3412 		}
3413 
3414 		if (open_file->invalidHandle) {
3415 			rc = cifs_reopen_file(open_file, false);
3416 			if (rc == -EAGAIN)
3417 				continue;
3418 			else if (rc)
3419 				break;
3420 		}
3421 
3422 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3423 						   &wsize, credits);
3424 		if (rc)
3425 			break;
3426 
3427 		max_len = min_t(const size_t, len, wsize);
3428 		if (!max_len) {
3429 			rc = -EAGAIN;
3430 			add_credits_and_wake_if(server, credits, 0);
3431 			break;
3432 		}
3433 
3434 		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3435 		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3436 			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3437 		if (cur_len == 0) {
3438 			rc = -EIO;
3439 			add_credits_and_wake_if(server, credits, 0);
3440 			break;
3441 		}
3442 
3443 		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3444 		if (!wdata) {
3445 			rc = -ENOMEM;
3446 			add_credits_and_wake_if(server, credits, 0);
3447 			break;
3448 		}
3449 
3450 		wdata->sync_mode = WB_SYNC_ALL;
3451 		wdata->offset	= (__u64)fpos;
3452 		wdata->cfile	= cifsFileInfo_get(open_file);
3453 		wdata->server	= server;
3454 		wdata->pid	= pid;
3455 		wdata->bytes	= cur_len;
3456 		wdata->credits	= credits_on_stack;
3457 		wdata->iter	= *from;
3458 		wdata->ctx	= ctx;
3459 		kref_get(&ctx->refcount);
3460 
3461 		iov_iter_truncate(&wdata->iter, cur_len);
3462 
3463 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3464 
3465 		if (!rc) {
3466 			if (wdata->cfile->invalidHandle)
3467 				rc = -EAGAIN;
3468 			else
3469 				rc = server->ops->async_writev(wdata,
3470 					cifs_uncached_writedata_release);
3471 		}
3472 
3473 		if (rc) {
3474 			add_credits_and_wake_if(server, &wdata->credits, 0);
3475 			kref_put(&wdata->refcount,
3476 				 cifs_uncached_writedata_release);
3477 			if (rc == -EAGAIN)
3478 				continue;
3479 			break;
3480 		}
3481 
3482 		list_add_tail(&wdata->list, wdata_list);
3483 		iov_iter_advance(from, cur_len);
3484 		fpos += cur_len;
3485 		len -= cur_len;
3486 	} while (len > 0);
3487 
3488 	free_xid(xid);
3489 	return rc;
3490 }
3491 
3492 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3493 {
3494 	struct cifs_writedata *wdata, *tmp;
3495 	struct cifs_tcon *tcon;
3496 	struct cifs_sb_info *cifs_sb;
3497 	struct dentry *dentry = ctx->cfile->dentry;
3498 	ssize_t rc;
3499 
3500 	tcon = tlink_tcon(ctx->cfile->tlink);
3501 	cifs_sb = CIFS_SB(dentry->d_sb);
3502 
3503 	mutex_lock(&ctx->aio_mutex);
3504 
3505 	if (list_empty(&ctx->list)) {
3506 		mutex_unlock(&ctx->aio_mutex);
3507 		return;
3508 	}
3509 
3510 	rc = ctx->rc;
3511 	/*
3512 	 * Wait for and collect replies for any successful sends in order of
3513 	 * increasing offset. Once an error is hit, then return without waiting
3514 	 * for any more replies.
3515 	 */
3516 restart_loop:
3517 	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3518 		if (!rc) {
3519 			if (!try_wait_for_completion(&wdata->done)) {
3520 				mutex_unlock(&ctx->aio_mutex);
3521 				return;
3522 			}
3523 
3524 			if (wdata->result)
3525 				rc = wdata->result;
3526 			else
3527 				ctx->total_len += wdata->bytes;
3528 
3529 			/* resend call if it's a retryable error */
3530 			if (rc == -EAGAIN) {
3531 				struct list_head tmp_list;
3532 				struct iov_iter tmp_from = ctx->iter;
3533 
3534 				INIT_LIST_HEAD(&tmp_list);
3535 				list_del_init(&wdata->list);
3536 
3537 				if (ctx->direct_io)
3538 					rc = cifs_resend_wdata(
3539 						wdata, &tmp_list, ctx);
3540 				else {
3541 					iov_iter_advance(&tmp_from,
3542 						 wdata->offset - ctx->pos);
3543 
3544 					rc = cifs_write_from_iter(wdata->offset,
3545 						wdata->bytes, &tmp_from,
3546 						ctx->cfile, cifs_sb, &tmp_list,
3547 						ctx);
3548 
3549 					kref_put(&wdata->refcount,
3550 						cifs_uncached_writedata_release);
3551 				}
3552 
3553 				list_splice(&tmp_list, &ctx->list);
3554 				goto restart_loop;
3555 			}
3556 		}
3557 		list_del_init(&wdata->list);
3558 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3559 	}
3560 
3561 	cifs_stats_bytes_written(tcon, ctx->total_len);
3562 	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3563 
3564 	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3565 
3566 	mutex_unlock(&ctx->aio_mutex);
3567 
3568 	if (ctx->iocb && ctx->iocb->ki_complete)
3569 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3570 	else
3571 		complete(&ctx->done);
3572 }
3573 
3574 static ssize_t __cifs_writev(
3575 	struct kiocb *iocb, struct iov_iter *from, bool direct)
3576 {
3577 	struct file *file = iocb->ki_filp;
3578 	ssize_t total_written = 0;
3579 	struct cifsFileInfo *cfile;
3580 	struct cifs_tcon *tcon;
3581 	struct cifs_sb_info *cifs_sb;
3582 	struct cifs_aio_ctx *ctx;
3583 	int rc;
3584 
3585 	rc = generic_write_checks(iocb, from);
3586 	if (rc <= 0)
3587 		return rc;
3588 
3589 	cifs_sb = CIFS_FILE_SB(file);
3590 	cfile = file->private_data;
3591 	tcon = tlink_tcon(cfile->tlink);
3592 
3593 	if (!tcon->ses->server->ops->async_writev)
3594 		return -ENOSYS;
3595 
3596 	ctx = cifs_aio_ctx_alloc();
3597 	if (!ctx)
3598 		return -ENOMEM;
3599 
3600 	ctx->cfile = cifsFileInfo_get(cfile);
3601 
3602 	if (!is_sync_kiocb(iocb))
3603 		ctx->iocb = iocb;
3604 
3605 	ctx->pos = iocb->ki_pos;
3606 	ctx->direct_io = direct;
3607 	ctx->nr_pinned_pages = 0;
3608 
3609 	if (user_backed_iter(from)) {
3610 		/*
3611 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3612 		 * they contain references to the calling process's virtual
3613 		 * memory layout which won't be available in an async worker
3614 		 * thread.  This also takes a pin on every folio involved.
3615 		 */
3616 		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3617 					     &ctx->iter, 0);
3618 		if (rc < 0) {
3619 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3620 			return rc;
3621 		}
3622 
3623 		ctx->nr_pinned_pages = rc;
3624 		ctx->bv = (void *)ctx->iter.bvec;
3625 		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3626 	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3627 		   !is_sync_kiocb(iocb)) {
3628 		/*
3629 		 * If the op is asynchronous, we need to copy the list attached
3630 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3631 		 * will be pinned by the caller; in any case, we may or may not
3632 		 * be able to pin the pages, so we don't try.
3633 		 */
3634 		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3635 		if (!ctx->bv) {
3636 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3637 			return -ENOMEM;
3638 		}
3639 	} else {
3640 		/*
3641 		 * Otherwise, we just pass the iterator down as-is and rely on
3642 		 * the caller to make sure the pages referred to by the
3643 		 * iterator don't evaporate.
3644 		 */
3645 		ctx->iter = *from;
3646 	}
3647 
3648 	ctx->len = iov_iter_count(&ctx->iter);
3649 
3650 	/* grab a lock here due to read response handlers can access ctx */
3651 	mutex_lock(&ctx->aio_mutex);
3652 
3653 	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3654 				  cfile, cifs_sb, &ctx->list, ctx);
3655 
3656 	/*
3657 	 * If at least one write was successfully sent, then discard any rc
3658 	 * value from the later writes. If the other write succeeds, then
3659 	 * we'll end up returning whatever was written. If it fails, then
3660 	 * we'll get a new rc value from that.
3661 	 */
3662 	if (!list_empty(&ctx->list))
3663 		rc = 0;
3664 
3665 	mutex_unlock(&ctx->aio_mutex);
3666 
3667 	if (rc) {
3668 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3669 		return rc;
3670 	}
3671 
3672 	if (!is_sync_kiocb(iocb)) {
3673 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3674 		return -EIOCBQUEUED;
3675 	}
3676 
3677 	rc = wait_for_completion_killable(&ctx->done);
3678 	if (rc) {
3679 		mutex_lock(&ctx->aio_mutex);
3680 		ctx->rc = rc = -EINTR;
3681 		total_written = ctx->total_len;
3682 		mutex_unlock(&ctx->aio_mutex);
3683 	} else {
3684 		rc = ctx->rc;
3685 		total_written = ctx->total_len;
3686 	}
3687 
3688 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3689 
3690 	if (unlikely(!total_written))
3691 		return rc;
3692 
3693 	iocb->ki_pos += total_written;
3694 	return total_written;
3695 }
3696 
3697 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3698 {
3699 	struct file *file = iocb->ki_filp;
3700 
3701 	cifs_revalidate_mapping(file->f_inode);
3702 	return __cifs_writev(iocb, from, true);
3703 }
3704 
3705 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3706 {
3707 	return __cifs_writev(iocb, from, false);
3708 }
3709 
3710 static ssize_t
3711 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3712 {
3713 	struct file *file = iocb->ki_filp;
3714 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3715 	struct inode *inode = file->f_mapping->host;
3716 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3717 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3718 	ssize_t rc;
3719 
3720 	inode_lock(inode);
3721 	/*
3722 	 * We need to hold the sem to be sure nobody modifies lock list
3723 	 * with a brlock that prevents writing.
3724 	 */
3725 	down_read(&cinode->lock_sem);
3726 
3727 	rc = generic_write_checks(iocb, from);
3728 	if (rc <= 0)
3729 		goto out;
3730 
3731 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3732 				     server->vals->exclusive_lock_type, 0,
3733 				     NULL, CIFS_WRITE_OP))
3734 		rc = __generic_file_write_iter(iocb, from);
3735 	else
3736 		rc = -EACCES;
3737 out:
3738 	up_read(&cinode->lock_sem);
3739 	inode_unlock(inode);
3740 
3741 	if (rc > 0)
3742 		rc = generic_write_sync(iocb, rc);
3743 	return rc;
3744 }
3745 
3746 ssize_t
3747 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3748 {
3749 	struct inode *inode = file_inode(iocb->ki_filp);
3750 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3751 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3752 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3753 						iocb->ki_filp->private_data;
3754 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3755 	ssize_t written;
3756 
3757 	written = cifs_get_writer(cinode);
3758 	if (written)
3759 		return written;
3760 
3761 	if (CIFS_CACHE_WRITE(cinode)) {
3762 		if (cap_unix(tcon->ses) &&
3763 		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3764 		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3765 			written = generic_file_write_iter(iocb, from);
3766 			goto out;
3767 		}
3768 		written = cifs_writev(iocb, from);
3769 		goto out;
3770 	}
3771 	/*
3772 	 * For non-oplocked files in strict cache mode we need to write the data
3773 	 * to the server exactly from the pos to pos+len-1 rather than flush all
3774 	 * affected pages because it may cause a error with mandatory locks on
3775 	 * these pages but not on the region from pos to ppos+len-1.
3776 	 */
3777 	written = cifs_user_writev(iocb, from);
3778 	if (CIFS_CACHE_READ(cinode)) {
3779 		/*
3780 		 * We have read level caching and we have just sent a write
3781 		 * request to the server thus making data in the cache stale.
3782 		 * Zap the cache and set oplock/lease level to NONE to avoid
3783 		 * reading stale data from the cache. All subsequent read
3784 		 * operations will read new data from the server.
3785 		 */
3786 		cifs_zap_mapping(inode);
3787 		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3788 			 inode);
3789 		cinode->oplock = 0;
3790 	}
3791 out:
3792 	cifs_put_writer(cinode);
3793 	return written;
3794 }
3795 
3796 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3797 {
3798 	struct cifs_readdata *rdata;
3799 
3800 	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3801 	if (rdata) {
3802 		kref_init(&rdata->refcount);
3803 		INIT_LIST_HEAD(&rdata->list);
3804 		init_completion(&rdata->done);
3805 		INIT_WORK(&rdata->work, complete);
3806 	}
3807 
3808 	return rdata;
3809 }
3810 
3811 void
3812 cifs_readdata_release(struct kref *refcount)
3813 {
3814 	struct cifs_readdata *rdata = container_of(refcount,
3815 					struct cifs_readdata, refcount);
3816 
3817 	if (rdata->ctx)
3818 		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3819 #ifdef CONFIG_CIFS_SMB_DIRECT
3820 	if (rdata->mr) {
3821 		smbd_deregister_mr(rdata->mr);
3822 		rdata->mr = NULL;
3823 	}
3824 #endif
3825 	if (rdata->cfile)
3826 		cifsFileInfo_put(rdata->cfile);
3827 
3828 	kfree(rdata);
3829 }
3830 
3831 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3832 
3833 static void
3834 cifs_uncached_readv_complete(struct work_struct *work)
3835 {
3836 	struct cifs_readdata *rdata = container_of(work,
3837 						struct cifs_readdata, work);
3838 
3839 	complete(&rdata->done);
3840 	collect_uncached_read_data(rdata->ctx);
3841 	/* the below call can possibly free the last ref to aio ctx */
3842 	kref_put(&rdata->refcount, cifs_readdata_release);
3843 }
3844 
3845 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3846 			struct list_head *rdata_list,
3847 			struct cifs_aio_ctx *ctx)
3848 {
3849 	unsigned int rsize;
3850 	struct cifs_credits credits;
3851 	int rc;
3852 	struct TCP_Server_Info *server;
3853 
3854 	/* XXX: should we pick a new channel here? */
3855 	server = rdata->server;
3856 
3857 	do {
3858 		if (rdata->cfile->invalidHandle) {
3859 			rc = cifs_reopen_file(rdata->cfile, true);
3860 			if (rc == -EAGAIN)
3861 				continue;
3862 			else if (rc)
3863 				break;
3864 		}
3865 
3866 		/*
3867 		 * Wait for credits to resend this rdata.
3868 		 * Note: we are attempting to resend the whole rdata not in
3869 		 * segments
3870 		 */
3871 		do {
3872 			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3873 						&rsize, &credits);
3874 
3875 			if (rc)
3876 				goto fail;
3877 
3878 			if (rsize < rdata->bytes) {
3879 				add_credits_and_wake_if(server, &credits, 0);
3880 				msleep(1000);
3881 			}
3882 		} while (rsize < rdata->bytes);
3883 		rdata->credits = credits;
3884 
3885 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3886 		if (!rc) {
3887 			if (rdata->cfile->invalidHandle)
3888 				rc = -EAGAIN;
3889 			else {
3890 #ifdef CONFIG_CIFS_SMB_DIRECT
3891 				if (rdata->mr) {
3892 					rdata->mr->need_invalidate = true;
3893 					smbd_deregister_mr(rdata->mr);
3894 					rdata->mr = NULL;
3895 				}
3896 #endif
3897 				rc = server->ops->async_readv(rdata);
3898 			}
3899 		}
3900 
3901 		/* If the read was successfully sent, we are done */
3902 		if (!rc) {
3903 			/* Add to aio pending list */
3904 			list_add_tail(&rdata->list, rdata_list);
3905 			return 0;
3906 		}
3907 
3908 		/* Roll back credits and retry if needed */
3909 		add_credits_and_wake_if(server, &rdata->credits, 0);
3910 	} while (rc == -EAGAIN);
3911 
3912 fail:
3913 	kref_put(&rdata->refcount, cifs_readdata_release);
3914 	return rc;
3915 }
3916 
3917 static int
3918 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
3919 		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3920 		     struct cifs_aio_ctx *ctx)
3921 {
3922 	struct cifs_readdata *rdata;
3923 	unsigned int rsize, nsegs, max_segs = INT_MAX;
3924 	struct cifs_credits credits_on_stack;
3925 	struct cifs_credits *credits = &credits_on_stack;
3926 	size_t cur_len, max_len;
3927 	int rc;
3928 	pid_t pid;
3929 	struct TCP_Server_Info *server;
3930 
3931 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3932 
3933 #ifdef CONFIG_CIFS_SMB_DIRECT
3934 	if (server->smbd_conn)
3935 		max_segs = server->smbd_conn->max_frmr_depth;
3936 #endif
3937 
3938 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3939 		pid = open_file->pid;
3940 	else
3941 		pid = current->tgid;
3942 
3943 	do {
3944 		if (open_file->invalidHandle) {
3945 			rc = cifs_reopen_file(open_file, true);
3946 			if (rc == -EAGAIN)
3947 				continue;
3948 			else if (rc)
3949 				break;
3950 		}
3951 
3952 		if (cifs_sb->ctx->rsize == 0)
3953 			cifs_sb->ctx->rsize =
3954 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
3955 							     cifs_sb->ctx);
3956 
3957 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3958 						   &rsize, credits);
3959 		if (rc)
3960 			break;
3961 
3962 		max_len = min_t(size_t, len, rsize);
3963 
3964 		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
3965 						 max_segs, &nsegs);
3966 		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3967 			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
3968 		if (cur_len == 0) {
3969 			rc = -EIO;
3970 			add_credits_and_wake_if(server, credits, 0);
3971 			break;
3972 		}
3973 
3974 		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
3975 		if (!rdata) {
3976 			add_credits_and_wake_if(server, credits, 0);
3977 			rc = -ENOMEM;
3978 			break;
3979 		}
3980 
3981 		rdata->server	= server;
3982 		rdata->cfile	= cifsFileInfo_get(open_file);
3983 		rdata->offset	= fpos;
3984 		rdata->bytes	= cur_len;
3985 		rdata->pid	= pid;
3986 		rdata->credits	= credits_on_stack;
3987 		rdata->ctx	= ctx;
3988 		kref_get(&ctx->refcount);
3989 
3990 		rdata->iter	= ctx->iter;
3991 		iov_iter_truncate(&rdata->iter, cur_len);
3992 
3993 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3994 
3995 		if (!rc) {
3996 			if (rdata->cfile->invalidHandle)
3997 				rc = -EAGAIN;
3998 			else
3999 				rc = server->ops->async_readv(rdata);
4000 		}
4001 
4002 		if (rc) {
4003 			add_credits_and_wake_if(server, &rdata->credits, 0);
4004 			kref_put(&rdata->refcount, cifs_readdata_release);
4005 			if (rc == -EAGAIN)
4006 				continue;
4007 			break;
4008 		}
4009 
4010 		list_add_tail(&rdata->list, rdata_list);
4011 		iov_iter_advance(&ctx->iter, cur_len);
4012 		fpos += cur_len;
4013 		len -= cur_len;
4014 	} while (len > 0);
4015 
4016 	return rc;
4017 }
4018 
4019 static void
4020 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4021 {
4022 	struct cifs_readdata *rdata, *tmp;
4023 	struct cifs_sb_info *cifs_sb;
4024 	int rc;
4025 
4026 	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4027 
4028 	mutex_lock(&ctx->aio_mutex);
4029 
4030 	if (list_empty(&ctx->list)) {
4031 		mutex_unlock(&ctx->aio_mutex);
4032 		return;
4033 	}
4034 
4035 	rc = ctx->rc;
4036 	/* the loop below should proceed in the order of increasing offsets */
4037 again:
4038 	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4039 		if (!rc) {
4040 			if (!try_wait_for_completion(&rdata->done)) {
4041 				mutex_unlock(&ctx->aio_mutex);
4042 				return;
4043 			}
4044 
4045 			if (rdata->result == -EAGAIN) {
4046 				/* resend call if it's a retryable error */
4047 				struct list_head tmp_list;
4048 				unsigned int got_bytes = rdata->got_bytes;
4049 
4050 				list_del_init(&rdata->list);
4051 				INIT_LIST_HEAD(&tmp_list);
4052 
4053 				if (ctx->direct_io) {
4054 					/*
4055 					 * Re-use rdata as this is a
4056 					 * direct I/O
4057 					 */
4058 					rc = cifs_resend_rdata(
4059 						rdata,
4060 						&tmp_list, ctx);
4061 				} else {
4062 					rc = cifs_send_async_read(
4063 						rdata->offset + got_bytes,
4064 						rdata->bytes - got_bytes,
4065 						rdata->cfile, cifs_sb,
4066 						&tmp_list, ctx);
4067 
4068 					kref_put(&rdata->refcount,
4069 						cifs_readdata_release);
4070 				}
4071 
4072 				list_splice(&tmp_list, &ctx->list);
4073 
4074 				goto again;
4075 			} else if (rdata->result)
4076 				rc = rdata->result;
4077 
4078 			/* if there was a short read -- discard anything left */
4079 			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4080 				rc = -ENODATA;
4081 
4082 			ctx->total_len += rdata->got_bytes;
4083 		}
4084 		list_del_init(&rdata->list);
4085 		kref_put(&rdata->refcount, cifs_readdata_release);
4086 	}
4087 
4088 	/* mask nodata case */
4089 	if (rc == -ENODATA)
4090 		rc = 0;
4091 
4092 	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4093 
4094 	mutex_unlock(&ctx->aio_mutex);
4095 
4096 	if (ctx->iocb && ctx->iocb->ki_complete)
4097 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4098 	else
4099 		complete(&ctx->done);
4100 }
4101 
4102 static ssize_t __cifs_readv(
4103 	struct kiocb *iocb, struct iov_iter *to, bool direct)
4104 {
4105 	size_t len;
4106 	struct file *file = iocb->ki_filp;
4107 	struct cifs_sb_info *cifs_sb;
4108 	struct cifsFileInfo *cfile;
4109 	struct cifs_tcon *tcon;
4110 	ssize_t rc, total_read = 0;
4111 	loff_t offset = iocb->ki_pos;
4112 	struct cifs_aio_ctx *ctx;
4113 
4114 	len = iov_iter_count(to);
4115 	if (!len)
4116 		return 0;
4117 
4118 	cifs_sb = CIFS_FILE_SB(file);
4119 	cfile = file->private_data;
4120 	tcon = tlink_tcon(cfile->tlink);
4121 
4122 	if (!tcon->ses->server->ops->async_readv)
4123 		return -ENOSYS;
4124 
4125 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4126 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4127 
4128 	ctx = cifs_aio_ctx_alloc();
4129 	if (!ctx)
4130 		return -ENOMEM;
4131 
4132 	ctx->pos	= offset;
4133 	ctx->direct_io	= direct;
4134 	ctx->len	= len;
4135 	ctx->cfile	= cifsFileInfo_get(cfile);
4136 	ctx->nr_pinned_pages = 0;
4137 
4138 	if (!is_sync_kiocb(iocb))
4139 		ctx->iocb = iocb;
4140 
4141 	if (user_backed_iter(to)) {
4142 		/*
4143 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4144 		 * they contain references to the calling process's virtual
4145 		 * memory layout which won't be available in an async worker
4146 		 * thread.  This also takes a pin on every folio involved.
4147 		 */
4148 		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4149 					     &ctx->iter, 0);
4150 		if (rc < 0) {
4151 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4152 			return rc;
4153 		}
4154 
4155 		ctx->nr_pinned_pages = rc;
4156 		ctx->bv = (void *)ctx->iter.bvec;
4157 		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4158 		ctx->should_dirty = true;
4159 	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4160 		   !is_sync_kiocb(iocb)) {
4161 		/*
4162 		 * If the op is asynchronous, we need to copy the list attached
4163 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4164 		 * will be retained by the caller; in any case, we may or may
4165 		 * not be able to pin the pages, so we don't try.
4166 		 */
4167 		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4168 		if (!ctx->bv) {
4169 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4170 			return -ENOMEM;
4171 		}
4172 	} else {
4173 		/*
4174 		 * Otherwise, we just pass the iterator down as-is and rely on
4175 		 * the caller to make sure the pages referred to by the
4176 		 * iterator don't evaporate.
4177 		 */
4178 		ctx->iter = *to;
4179 	}
4180 
4181 	if (direct) {
4182 		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4183 						  offset, offset + len - 1);
4184 		if (rc) {
4185 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4186 			return -EAGAIN;
4187 		}
4188 	}
4189 
4190 	/* grab a lock here due to read response handlers can access ctx */
4191 	mutex_lock(&ctx->aio_mutex);
4192 
4193 	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4194 
4195 	/* if at least one read request send succeeded, then reset rc */
4196 	if (!list_empty(&ctx->list))
4197 		rc = 0;
4198 
4199 	mutex_unlock(&ctx->aio_mutex);
4200 
4201 	if (rc) {
4202 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4203 		return rc;
4204 	}
4205 
4206 	if (!is_sync_kiocb(iocb)) {
4207 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4208 		return -EIOCBQUEUED;
4209 	}
4210 
4211 	rc = wait_for_completion_killable(&ctx->done);
4212 	if (rc) {
4213 		mutex_lock(&ctx->aio_mutex);
4214 		ctx->rc = rc = -EINTR;
4215 		total_read = ctx->total_len;
4216 		mutex_unlock(&ctx->aio_mutex);
4217 	} else {
4218 		rc = ctx->rc;
4219 		total_read = ctx->total_len;
4220 	}
4221 
4222 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4223 
4224 	if (total_read) {
4225 		iocb->ki_pos += total_read;
4226 		return total_read;
4227 	}
4228 	return rc;
4229 }
4230 
4231 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4232 {
4233 	return __cifs_readv(iocb, to, true);
4234 }
4235 
4236 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4237 {
4238 	return __cifs_readv(iocb, to, false);
4239 }
4240 
4241 ssize_t
4242 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4243 {
4244 	struct inode *inode = file_inode(iocb->ki_filp);
4245 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4246 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4247 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4248 						iocb->ki_filp->private_data;
4249 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4250 	int rc = -EACCES;
4251 
4252 	/*
4253 	 * In strict cache mode we need to read from the server all the time
4254 	 * if we don't have level II oplock because the server can delay mtime
4255 	 * change - so we can't make a decision about inode invalidating.
4256 	 * And we can also fail with pagereading if there are mandatory locks
4257 	 * on pages affected by this read but not on the region from pos to
4258 	 * pos+len-1.
4259 	 */
4260 	if (!CIFS_CACHE_READ(cinode))
4261 		return cifs_user_readv(iocb, to);
4262 
4263 	if (cap_unix(tcon->ses) &&
4264 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4265 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4266 		return generic_file_read_iter(iocb, to);
4267 
4268 	/*
4269 	 * We need to hold the sem to be sure nobody modifies lock list
4270 	 * with a brlock that prevents reading.
4271 	 */
4272 	down_read(&cinode->lock_sem);
4273 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4274 				     tcon->ses->server->vals->shared_lock_type,
4275 				     0, NULL, CIFS_READ_OP))
4276 		rc = generic_file_read_iter(iocb, to);
4277 	up_read(&cinode->lock_sem);
4278 	return rc;
4279 }
4280 
4281 static ssize_t
4282 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4283 {
4284 	int rc = -EACCES;
4285 	unsigned int bytes_read = 0;
4286 	unsigned int total_read;
4287 	unsigned int current_read_size;
4288 	unsigned int rsize;
4289 	struct cifs_sb_info *cifs_sb;
4290 	struct cifs_tcon *tcon;
4291 	struct TCP_Server_Info *server;
4292 	unsigned int xid;
4293 	char *cur_offset;
4294 	struct cifsFileInfo *open_file;
4295 	struct cifs_io_parms io_parms = {0};
4296 	int buf_type = CIFS_NO_BUFFER;
4297 	__u32 pid;
4298 
4299 	xid = get_xid();
4300 	cifs_sb = CIFS_FILE_SB(file);
4301 
4302 	/* FIXME: set up handlers for larger reads and/or convert to async */
4303 	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4304 
4305 	if (file->private_data == NULL) {
4306 		rc = -EBADF;
4307 		free_xid(xid);
4308 		return rc;
4309 	}
4310 	open_file = file->private_data;
4311 	tcon = tlink_tcon(open_file->tlink);
4312 	server = cifs_pick_channel(tcon->ses);
4313 
4314 	if (!server->ops->sync_read) {
4315 		free_xid(xid);
4316 		return -ENOSYS;
4317 	}
4318 
4319 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4320 		pid = open_file->pid;
4321 	else
4322 		pid = current->tgid;
4323 
4324 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4325 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4326 
4327 	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4328 	     total_read += bytes_read, cur_offset += bytes_read) {
4329 		do {
4330 			current_read_size = min_t(uint, read_size - total_read,
4331 						  rsize);
4332 			/*
4333 			 * For windows me and 9x we do not want to request more
4334 			 * than it negotiated since it will refuse the read
4335 			 * then.
4336 			 */
4337 			if (!(tcon->ses->capabilities &
4338 				tcon->ses->server->vals->cap_large_files)) {
4339 				current_read_size = min_t(uint,
4340 					current_read_size, CIFSMaxBufSize);
4341 			}
4342 			if (open_file->invalidHandle) {
4343 				rc = cifs_reopen_file(open_file, true);
4344 				if (rc != 0)
4345 					break;
4346 			}
4347 			io_parms.pid = pid;
4348 			io_parms.tcon = tcon;
4349 			io_parms.offset = *offset;
4350 			io_parms.length = current_read_size;
4351 			io_parms.server = server;
4352 			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4353 						    &bytes_read, &cur_offset,
4354 						    &buf_type);
4355 		} while (rc == -EAGAIN);
4356 
4357 		if (rc || (bytes_read == 0)) {
4358 			if (total_read) {
4359 				break;
4360 			} else {
4361 				free_xid(xid);
4362 				return rc;
4363 			}
4364 		} else {
4365 			cifs_stats_bytes_read(tcon, total_read);
4366 			*offset += bytes_read;
4367 		}
4368 	}
4369 	free_xid(xid);
4370 	return total_read;
4371 }
4372 
4373 /*
4374  * If the page is mmap'ed into a process' page tables, then we need to make
4375  * sure that it doesn't change while being written back.
4376  */
4377 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4378 {
4379 	struct folio *folio = page_folio(vmf->page);
4380 
4381 	/* Wait for the folio to be written to the cache before we allow it to
4382 	 * be modified.  We then assume the entire folio will need writing back.
4383 	 */
4384 #ifdef CONFIG_CIFS_FSCACHE
4385 	if (folio_test_fscache(folio) &&
4386 	    folio_wait_fscache_killable(folio) < 0)
4387 		return VM_FAULT_RETRY;
4388 #endif
4389 
4390 	folio_wait_writeback(folio);
4391 
4392 	if (folio_lock_killable(folio) < 0)
4393 		return VM_FAULT_RETRY;
4394 	return VM_FAULT_LOCKED;
4395 }
4396 
4397 static const struct vm_operations_struct cifs_file_vm_ops = {
4398 	.fault = filemap_fault,
4399 	.map_pages = filemap_map_pages,
4400 	.page_mkwrite = cifs_page_mkwrite,
4401 };
4402 
4403 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4404 {
4405 	int xid, rc = 0;
4406 	struct inode *inode = file_inode(file);
4407 
4408 	xid = get_xid();
4409 
4410 	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4411 		rc = cifs_zap_mapping(inode);
4412 	if (!rc)
4413 		rc = generic_file_mmap(file, vma);
4414 	if (!rc)
4415 		vma->vm_ops = &cifs_file_vm_ops;
4416 
4417 	free_xid(xid);
4418 	return rc;
4419 }
4420 
4421 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4422 {
4423 	int rc, xid;
4424 
4425 	xid = get_xid();
4426 
4427 	rc = cifs_revalidate_file(file);
4428 	if (rc)
4429 		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4430 			 rc);
4431 	if (!rc)
4432 		rc = generic_file_mmap(file, vma);
4433 	if (!rc)
4434 		vma->vm_ops = &cifs_file_vm_ops;
4435 
4436 	free_xid(xid);
4437 	return rc;
4438 }
4439 
4440 /*
4441  * Unlock a bunch of folios in the pagecache.
4442  */
4443 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4444 {
4445 	struct folio *folio;
4446 	XA_STATE(xas, &mapping->i_pages, first);
4447 
4448 	rcu_read_lock();
4449 	xas_for_each(&xas, folio, last) {
4450 		folio_unlock(folio);
4451 	}
4452 	rcu_read_unlock();
4453 }
4454 
4455 static void cifs_readahead_complete(struct work_struct *work)
4456 {
4457 	struct cifs_readdata *rdata = container_of(work,
4458 						   struct cifs_readdata, work);
4459 	struct folio *folio;
4460 	pgoff_t last;
4461 	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4462 
4463 	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4464 
4465 	if (good)
4466 		cifs_readahead_to_fscache(rdata->mapping->host,
4467 					  rdata->offset, rdata->bytes);
4468 
4469 	if (iov_iter_count(&rdata->iter) > 0)
4470 		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4471 
4472 	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4473 
4474 	rcu_read_lock();
4475 	xas_for_each(&xas, folio, last) {
4476 		if (good) {
4477 			flush_dcache_folio(folio);
4478 			folio_mark_uptodate(folio);
4479 		}
4480 		folio_unlock(folio);
4481 	}
4482 	rcu_read_unlock();
4483 
4484 	kref_put(&rdata->refcount, cifs_readdata_release);
4485 }
4486 
4487 static void cifs_readahead(struct readahead_control *ractl)
4488 {
4489 	struct cifsFileInfo *open_file = ractl->file->private_data;
4490 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4491 	struct TCP_Server_Info *server;
4492 	unsigned int xid, nr_pages, cache_nr_pages = 0;
4493 	unsigned int ra_pages;
4494 	pgoff_t next_cached = ULONG_MAX, ra_index;
4495 	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4496 		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4497 	bool check_cache = caching;
4498 	pid_t pid;
4499 	int rc = 0;
4500 
4501 	/* Note that readahead_count() lags behind our dequeuing of pages from
4502 	 * the ractl, wo we have to keep track for ourselves.
4503 	 */
4504 	ra_pages = readahead_count(ractl);
4505 	ra_index = readahead_index(ractl);
4506 
4507 	xid = get_xid();
4508 
4509 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4510 		pid = open_file->pid;
4511 	else
4512 		pid = current->tgid;
4513 
4514 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4515 
4516 	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4517 		 __func__, ractl->file, ractl->mapping, ra_pages);
4518 
4519 	/*
4520 	 * Chop the readahead request up into rsize-sized read requests.
4521 	 */
4522 	while ((nr_pages = ra_pages)) {
4523 		unsigned int i, rsize;
4524 		struct cifs_readdata *rdata;
4525 		struct cifs_credits credits_on_stack;
4526 		struct cifs_credits *credits = &credits_on_stack;
4527 		struct folio *folio;
4528 		pgoff_t fsize;
4529 
4530 		/*
4531 		 * Find out if we have anything cached in the range of
4532 		 * interest, and if so, where the next chunk of cached data is.
4533 		 */
4534 		if (caching) {
4535 			if (check_cache) {
4536 				rc = cifs_fscache_query_occupancy(
4537 					ractl->mapping->host, ra_index, nr_pages,
4538 					&next_cached, &cache_nr_pages);
4539 				if (rc < 0)
4540 					caching = false;
4541 				check_cache = false;
4542 			}
4543 
4544 			if (ra_index == next_cached) {
4545 				/*
4546 				 * TODO: Send a whole batch of pages to be read
4547 				 * by the cache.
4548 				 */
4549 				folio = readahead_folio(ractl);
4550 				fsize = folio_nr_pages(folio);
4551 				ra_pages -= fsize;
4552 				ra_index += fsize;
4553 				if (cifs_readpage_from_fscache(ractl->mapping->host,
4554 							       &folio->page) < 0) {
4555 					/*
4556 					 * TODO: Deal with cache read failure
4557 					 * here, but for the moment, delegate
4558 					 * that to readpage.
4559 					 */
4560 					caching = false;
4561 				}
4562 				folio_unlock(folio);
4563 				next_cached += fsize;
4564 				cache_nr_pages -= fsize;
4565 				if (cache_nr_pages == 0)
4566 					check_cache = true;
4567 				continue;
4568 			}
4569 		}
4570 
4571 		if (open_file->invalidHandle) {
4572 			rc = cifs_reopen_file(open_file, true);
4573 			if (rc) {
4574 				if (rc == -EAGAIN)
4575 					continue;
4576 				break;
4577 			}
4578 		}
4579 
4580 		if (cifs_sb->ctx->rsize == 0)
4581 			cifs_sb->ctx->rsize =
4582 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4583 							     cifs_sb->ctx);
4584 
4585 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4586 						   &rsize, credits);
4587 		if (rc)
4588 			break;
4589 		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4590 		if (next_cached != ULONG_MAX)
4591 			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4592 
4593 		/*
4594 		 * Give up immediately if rsize is too small to read an entire
4595 		 * page. The VFS will fall back to readpage. We should never
4596 		 * reach this point however since we set ra_pages to 0 when the
4597 		 * rsize is smaller than a cache page.
4598 		 */
4599 		if (unlikely(!nr_pages)) {
4600 			add_credits_and_wake_if(server, credits, 0);
4601 			break;
4602 		}
4603 
4604 		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4605 		if (!rdata) {
4606 			/* best to give up if we're out of mem */
4607 			add_credits_and_wake_if(server, credits, 0);
4608 			break;
4609 		}
4610 
4611 		rdata->offset	= ra_index * PAGE_SIZE;
4612 		rdata->bytes	= nr_pages * PAGE_SIZE;
4613 		rdata->cfile	= cifsFileInfo_get(open_file);
4614 		rdata->server	= server;
4615 		rdata->mapping	= ractl->mapping;
4616 		rdata->pid	= pid;
4617 		rdata->credits	= credits_on_stack;
4618 
4619 		for (i = 0; i < nr_pages; i++) {
4620 			if (!readahead_folio(ractl))
4621 				WARN_ON(1);
4622 		}
4623 		ra_pages -= nr_pages;
4624 		ra_index += nr_pages;
4625 
4626 		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4627 				rdata->offset, rdata->bytes);
4628 
4629 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4630 		if (!rc) {
4631 			if (rdata->cfile->invalidHandle)
4632 				rc = -EAGAIN;
4633 			else
4634 				rc = server->ops->async_readv(rdata);
4635 		}
4636 
4637 		if (rc) {
4638 			add_credits_and_wake_if(server, &rdata->credits, 0);
4639 			cifs_unlock_folios(rdata->mapping,
4640 					   rdata->offset / PAGE_SIZE,
4641 					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4642 			/* Fallback to the readpage in error/reconnect cases */
4643 			kref_put(&rdata->refcount, cifs_readdata_release);
4644 			break;
4645 		}
4646 
4647 		kref_put(&rdata->refcount, cifs_readdata_release);
4648 	}
4649 
4650 	free_xid(xid);
4651 }
4652 
4653 /*
4654  * cifs_readpage_worker must be called with the page pinned
4655  */
4656 static int cifs_readpage_worker(struct file *file, struct page *page,
4657 	loff_t *poffset)
4658 {
4659 	struct inode *inode = file_inode(file);
4660 	struct timespec64 atime, mtime;
4661 	char *read_data;
4662 	int rc;
4663 
4664 	/* Is the page cached? */
4665 	rc = cifs_readpage_from_fscache(inode, page);
4666 	if (rc == 0)
4667 		goto read_complete;
4668 
4669 	read_data = kmap(page);
4670 	/* for reads over a certain size could initiate async read ahead */
4671 
4672 	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4673 
4674 	if (rc < 0)
4675 		goto io_error;
4676 	else
4677 		cifs_dbg(FYI, "Bytes read %d\n", rc);
4678 
4679 	/* we do not want atime to be less than mtime, it broke some apps */
4680 	atime = inode_set_atime_to_ts(inode, current_time(inode));
4681 	mtime = inode_get_mtime(inode);
4682 	if (timespec64_compare(&atime, &mtime) < 0)
4683 		inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4684 
4685 	if (PAGE_SIZE > rc)
4686 		memset(read_data + rc, 0, PAGE_SIZE - rc);
4687 
4688 	flush_dcache_page(page);
4689 	SetPageUptodate(page);
4690 	rc = 0;
4691 
4692 io_error:
4693 	kunmap(page);
4694 
4695 read_complete:
4696 	unlock_page(page);
4697 	return rc;
4698 }
4699 
4700 static int cifs_read_folio(struct file *file, struct folio *folio)
4701 {
4702 	struct page *page = &folio->page;
4703 	loff_t offset = page_file_offset(page);
4704 	int rc = -EACCES;
4705 	unsigned int xid;
4706 
4707 	xid = get_xid();
4708 
4709 	if (file->private_data == NULL) {
4710 		rc = -EBADF;
4711 		free_xid(xid);
4712 		return rc;
4713 	}
4714 
4715 	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4716 		 page, (int)offset, (int)offset);
4717 
4718 	rc = cifs_readpage_worker(file, page, &offset);
4719 
4720 	free_xid(xid);
4721 	return rc;
4722 }
4723 
4724 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4725 {
4726 	struct cifsFileInfo *open_file;
4727 
4728 	spin_lock(&cifs_inode->open_file_lock);
4729 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4730 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4731 			spin_unlock(&cifs_inode->open_file_lock);
4732 			return 1;
4733 		}
4734 	}
4735 	spin_unlock(&cifs_inode->open_file_lock);
4736 	return 0;
4737 }
4738 
4739 /* We do not want to update the file size from server for inodes
4740    open for write - to avoid races with writepage extending
4741    the file - in the future we could consider allowing
4742    refreshing the inode only on increases in the file size
4743    but this is tricky to do without racing with writebehind
4744    page caching in the current Linux kernel design */
4745 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4746 {
4747 	if (!cifsInode)
4748 		return true;
4749 
4750 	if (is_inode_writable(cifsInode)) {
4751 		/* This inode is open for write at least once */
4752 		struct cifs_sb_info *cifs_sb;
4753 
4754 		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4755 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4756 			/* since no page cache to corrupt on directio
4757 			we can change size safely */
4758 			return true;
4759 		}
4760 
4761 		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4762 			return true;
4763 
4764 		return false;
4765 	} else
4766 		return true;
4767 }
4768 
4769 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4770 			loff_t pos, unsigned len,
4771 			struct page **pagep, void **fsdata)
4772 {
4773 	int oncethru = 0;
4774 	pgoff_t index = pos >> PAGE_SHIFT;
4775 	loff_t offset = pos & (PAGE_SIZE - 1);
4776 	loff_t page_start = pos & PAGE_MASK;
4777 	loff_t i_size;
4778 	struct page *page;
4779 	int rc = 0;
4780 
4781 	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4782 
4783 start:
4784 	page = grab_cache_page_write_begin(mapping, index);
4785 	if (!page) {
4786 		rc = -ENOMEM;
4787 		goto out;
4788 	}
4789 
4790 	if (PageUptodate(page))
4791 		goto out;
4792 
4793 	/*
4794 	 * If we write a full page it will be up to date, no need to read from
4795 	 * the server. If the write is short, we'll end up doing a sync write
4796 	 * instead.
4797 	 */
4798 	if (len == PAGE_SIZE)
4799 		goto out;
4800 
4801 	/*
4802 	 * optimize away the read when we have an oplock, and we're not
4803 	 * expecting to use any of the data we'd be reading in. That
4804 	 * is, when the page lies beyond the EOF, or straddles the EOF
4805 	 * and the write will cover all of the existing data.
4806 	 */
4807 	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4808 		i_size = i_size_read(mapping->host);
4809 		if (page_start >= i_size ||
4810 		    (offset == 0 && (pos + len) >= i_size)) {
4811 			zero_user_segments(page, 0, offset,
4812 					   offset + len,
4813 					   PAGE_SIZE);
4814 			/*
4815 			 * PageChecked means that the parts of the page
4816 			 * to which we're not writing are considered up
4817 			 * to date. Once the data is copied to the
4818 			 * page, it can be set uptodate.
4819 			 */
4820 			SetPageChecked(page);
4821 			goto out;
4822 		}
4823 	}
4824 
4825 	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4826 		/*
4827 		 * might as well read a page, it is fast enough. If we get
4828 		 * an error, we don't need to return it. cifs_write_end will
4829 		 * do a sync write instead since PG_uptodate isn't set.
4830 		 */
4831 		cifs_readpage_worker(file, page, &page_start);
4832 		put_page(page);
4833 		oncethru = 1;
4834 		goto start;
4835 	} else {
4836 		/* we could try using another file handle if there is one -
4837 		   but how would we lock it to prevent close of that handle
4838 		   racing with this read? In any case
4839 		   this will be written out by write_end so is fine */
4840 	}
4841 out:
4842 	*pagep = page;
4843 	return rc;
4844 }
4845 
4846 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4847 {
4848 	if (folio_test_private(folio))
4849 		return 0;
4850 	if (folio_test_fscache(folio)) {
4851 		if (current_is_kswapd() || !(gfp & __GFP_FS))
4852 			return false;
4853 		folio_wait_fscache(folio);
4854 	}
4855 	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4856 	return true;
4857 }
4858 
4859 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4860 				 size_t length)
4861 {
4862 	folio_wait_fscache(folio);
4863 }
4864 
4865 static int cifs_launder_folio(struct folio *folio)
4866 {
4867 	int rc = 0;
4868 	loff_t range_start = folio_pos(folio);
4869 	loff_t range_end = range_start + folio_size(folio);
4870 	struct writeback_control wbc = {
4871 		.sync_mode = WB_SYNC_ALL,
4872 		.nr_to_write = 0,
4873 		.range_start = range_start,
4874 		.range_end = range_end,
4875 	};
4876 
4877 	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4878 
4879 	if (folio_clear_dirty_for_io(folio))
4880 		rc = cifs_writepage_locked(&folio->page, &wbc);
4881 
4882 	folio_wait_fscache(folio);
4883 	return rc;
4884 }
4885 
4886 void cifs_oplock_break(struct work_struct *work)
4887 {
4888 	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4889 						  oplock_break);
4890 	struct inode *inode = d_inode(cfile->dentry);
4891 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4892 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4893 	struct cifs_tcon *tcon;
4894 	struct TCP_Server_Info *server;
4895 	struct tcon_link *tlink;
4896 	int rc = 0;
4897 	bool purge_cache = false, oplock_break_cancelled;
4898 	__u64 persistent_fid, volatile_fid;
4899 	__u16 net_fid;
4900 
4901 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4902 			TASK_UNINTERRUPTIBLE);
4903 
4904 	tlink = cifs_sb_tlink(cifs_sb);
4905 	if (IS_ERR(tlink))
4906 		goto out;
4907 	tcon = tlink_tcon(tlink);
4908 	server = tcon->ses->server;
4909 
4910 	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4911 				      cfile->oplock_epoch, &purge_cache);
4912 
4913 	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4914 						cifs_has_mand_locks(cinode)) {
4915 		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4916 			 inode);
4917 		cinode->oplock = 0;
4918 	}
4919 
4920 	if (inode && S_ISREG(inode->i_mode)) {
4921 		if (CIFS_CACHE_READ(cinode))
4922 			break_lease(inode, O_RDONLY);
4923 		else
4924 			break_lease(inode, O_WRONLY);
4925 		rc = filemap_fdatawrite(inode->i_mapping);
4926 		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4927 			rc = filemap_fdatawait(inode->i_mapping);
4928 			mapping_set_error(inode->i_mapping, rc);
4929 			cifs_zap_mapping(inode);
4930 		}
4931 		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4932 		if (CIFS_CACHE_WRITE(cinode))
4933 			goto oplock_break_ack;
4934 	}
4935 
4936 	rc = cifs_push_locks(cfile);
4937 	if (rc)
4938 		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4939 
4940 oplock_break_ack:
4941 	/*
4942 	 * When oplock break is received and there are no active
4943 	 * file handles but cached, then schedule deferred close immediately.
4944 	 * So, new open will not use cached handle.
4945 	 */
4946 
4947 	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
4948 		cifs_close_deferred_file(cinode);
4949 
4950 	persistent_fid = cfile->fid.persistent_fid;
4951 	volatile_fid = cfile->fid.volatile_fid;
4952 	net_fid = cfile->fid.netfid;
4953 	oplock_break_cancelled = cfile->oplock_break_cancelled;
4954 
4955 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4956 	/*
4957 	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
4958 	 * an acknowledgment to be sent when the file has already been closed.
4959 	 */
4960 	spin_lock(&cinode->open_file_lock);
4961 	/* check list empty since can race with kill_sb calling tree disconnect */
4962 	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
4963 		spin_unlock(&cinode->open_file_lock);
4964 		rc = server->ops->oplock_response(tcon, persistent_fid,
4965 						  volatile_fid, net_fid, cinode);
4966 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4967 	} else
4968 		spin_unlock(&cinode->open_file_lock);
4969 
4970 	cifs_put_tlink(tlink);
4971 out:
4972 	cifs_done_oplock_break(cinode);
4973 }
4974 
4975 /*
4976  * The presence of cifs_direct_io() in the address space ops vector
4977  * allowes open() O_DIRECT flags which would have failed otherwise.
4978  *
4979  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4980  * so this method should never be called.
4981  *
4982  * Direct IO is not yet supported in the cached mode.
4983  */
4984 static ssize_t
4985 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4986 {
4987         /*
4988          * FIXME
4989          * Eventually need to support direct IO for non forcedirectio mounts
4990          */
4991         return -EINVAL;
4992 }
4993 
4994 static int cifs_swap_activate(struct swap_info_struct *sis,
4995 			      struct file *swap_file, sector_t *span)
4996 {
4997 	struct cifsFileInfo *cfile = swap_file->private_data;
4998 	struct inode *inode = swap_file->f_mapping->host;
4999 	unsigned long blocks;
5000 	long long isize;
5001 
5002 	cifs_dbg(FYI, "swap activate\n");
5003 
5004 	if (!swap_file->f_mapping->a_ops->swap_rw)
5005 		/* Cannot support swap */
5006 		return -EINVAL;
5007 
5008 	spin_lock(&inode->i_lock);
5009 	blocks = inode->i_blocks;
5010 	isize = inode->i_size;
5011 	spin_unlock(&inode->i_lock);
5012 	if (blocks*512 < isize) {
5013 		pr_warn("swap activate: swapfile has holes\n");
5014 		return -EINVAL;
5015 	}
5016 	*span = sis->pages;
5017 
5018 	pr_warn_once("Swap support over SMB3 is experimental\n");
5019 
5020 	/*
5021 	 * TODO: consider adding ACL (or documenting how) to prevent other
5022 	 * users (on this or other systems) from reading it
5023 	 */
5024 
5025 
5026 	/* TODO: add sk_set_memalloc(inet) or similar */
5027 
5028 	if (cfile)
5029 		cfile->swapfile = true;
5030 	/*
5031 	 * TODO: Since file already open, we can't open with DENY_ALL here
5032 	 * but we could add call to grab a byte range lock to prevent others
5033 	 * from reading or writing the file
5034 	 */
5035 
5036 	sis->flags |= SWP_FS_OPS;
5037 	return add_swap_extent(sis, 0, sis->max, 0);
5038 }
5039 
5040 static void cifs_swap_deactivate(struct file *file)
5041 {
5042 	struct cifsFileInfo *cfile = file->private_data;
5043 
5044 	cifs_dbg(FYI, "swap deactivate\n");
5045 
5046 	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5047 
5048 	if (cfile)
5049 		cfile->swapfile = false;
5050 
5051 	/* do we need to unpin (or unlock) the file */
5052 }
5053 
5054 const struct address_space_operations cifs_addr_ops = {
5055 	.read_folio = cifs_read_folio,
5056 	.readahead = cifs_readahead,
5057 	.writepages = cifs_writepages,
5058 	.write_begin = cifs_write_begin,
5059 	.write_end = cifs_write_end,
5060 	.dirty_folio = netfs_dirty_folio,
5061 	.release_folio = cifs_release_folio,
5062 	.direct_IO = cifs_direct_io,
5063 	.invalidate_folio = cifs_invalidate_folio,
5064 	.launder_folio = cifs_launder_folio,
5065 	.migrate_folio = filemap_migrate_folio,
5066 	/*
5067 	 * TODO: investigate and if useful we could add an is_dirty_writeback
5068 	 * helper if needed
5069 	 */
5070 	.swap_activate = cifs_swap_activate,
5071 	.swap_deactivate = cifs_swap_deactivate,
5072 };
5073 
5074 /*
5075  * cifs_readahead requires the server to support a buffer large enough to
5076  * contain the header plus one complete page of data.  Otherwise, we need
5077  * to leave cifs_readahead out of the address space operations.
5078  */
5079 const struct address_space_operations cifs_addr_ops_smallbuf = {
5080 	.read_folio = cifs_read_folio,
5081 	.writepages = cifs_writepages,
5082 	.write_begin = cifs_write_begin,
5083 	.write_end = cifs_write_end,
5084 	.dirty_folio = netfs_dirty_folio,
5085 	.release_folio = cifs_release_folio,
5086 	.invalidate_folio = cifs_invalidate_folio,
5087 	.launder_folio = cifs_launder_folio,
5088 	.migrate_folio = filemap_migrate_folio,
5089 };
5090