xref: /linux/fs/smb/client/file.c (revision 1f20a5769446a1acae67ac9e63d07a594829a789)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39 
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45 	struct address_space *mapping = inode->i_mapping;
46 	struct folio *folio;
47 	pgoff_t end;
48 
49 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50 
51 	rcu_read_lock();
52 
53 	end = (start + len - 1) / PAGE_SIZE;
54 	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55 		if (xas_retry(&xas, folio))
56 			continue;
57 		xas_pause(&xas);
58 		rcu_read_unlock();
59 		folio_lock(folio);
60 		folio_clear_dirty_for_io(folio);
61 		folio_unlock(folio);
62 		rcu_read_lock();
63 	}
64 
65 	rcu_read_unlock();
66 }
67 
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73 	struct address_space *mapping = inode->i_mapping;
74 	struct folio *folio;
75 	pgoff_t end;
76 
77 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78 
79 	if (!len)
80 		return;
81 
82 	rcu_read_lock();
83 
84 	end = (start + len - 1) / PAGE_SIZE;
85 	xas_for_each(&xas, folio, end) {
86 		if (xas_retry(&xas, folio))
87 			continue;
88 		if (!folio_test_writeback(folio)) {
89 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90 				  len, start, folio->index, end);
91 			continue;
92 		}
93 
94 		folio_detach_private(folio);
95 		folio_end_writeback(folio);
96 	}
97 
98 	rcu_read_unlock();
99 }
100 
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106 	struct address_space *mapping = inode->i_mapping;
107 	struct folio *folio;
108 	pgoff_t end;
109 
110 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111 
112 	if (!len)
113 		return;
114 
115 	rcu_read_lock();
116 
117 	end = (start + len - 1) / PAGE_SIZE;
118 	xas_for_each(&xas, folio, end) {
119 		if (xas_retry(&xas, folio))
120 			continue;
121 		if (!folio_test_writeback(folio)) {
122 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123 				  len, start, folio->index, end);
124 			continue;
125 		}
126 
127 		folio_set_error(folio);
128 		folio_end_writeback(folio);
129 	}
130 
131 	rcu_read_unlock();
132 }
133 
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139 	struct address_space *mapping = inode->i_mapping;
140 	struct folio *folio;
141 	pgoff_t end;
142 
143 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144 
145 	if (!len)
146 		return;
147 
148 	rcu_read_lock();
149 
150 	end = (start + len - 1) / PAGE_SIZE;
151 	xas_for_each(&xas, folio, end) {
152 		if (!folio_test_writeback(folio)) {
153 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154 				  len, start, folio->index, end);
155 			continue;
156 		}
157 
158 		filemap_dirty_folio(folio->mapping, folio);
159 		folio_end_writeback(folio);
160 	}
161 
162 	rcu_read_unlock();
163 }
164 
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172 	struct cifsFileInfo *open_file = NULL;
173 	struct list_head *tmp;
174 	struct list_head *tmp1;
175 
176 	/* only send once per connect */
177 	spin_lock(&tcon->tc_lock);
178 	if (tcon->need_reconnect)
179 		tcon->status = TID_NEED_RECON;
180 
181 	if (tcon->status != TID_NEED_RECON) {
182 		spin_unlock(&tcon->tc_lock);
183 		return;
184 	}
185 	tcon->status = TID_IN_FILES_INVALIDATE;
186 	spin_unlock(&tcon->tc_lock);
187 
188 	/* list all files open on tree connection and mark them invalid */
189 	spin_lock(&tcon->open_file_lock);
190 	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192 		open_file->invalidHandle = true;
193 		open_file->oplock_break_cancelled = true;
194 	}
195 	spin_unlock(&tcon->open_file_lock);
196 
197 	invalidate_all_cached_dirs(tcon);
198 	spin_lock(&tcon->tc_lock);
199 	if (tcon->status == TID_IN_FILES_INVALIDATE)
200 		tcon->status = TID_NEED_TCON;
201 	spin_unlock(&tcon->tc_lock);
202 
203 	/*
204 	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205 	 * to this tcon.
206 	 */
207 }
208 
209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
210 {
211 	if ((flags & O_ACCMODE) == O_RDONLY)
212 		return GENERIC_READ;
213 	else if ((flags & O_ACCMODE) == O_WRONLY)
214 		return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
215 	else if ((flags & O_ACCMODE) == O_RDWR) {
216 		/* GENERIC_ALL is too much permission to request
217 		   can cause unnecessary access denied on create */
218 		/* return GENERIC_ALL; */
219 		return (GENERIC_READ | GENERIC_WRITE);
220 	}
221 
222 	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223 		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224 		FILE_READ_DATA);
225 }
226 
227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228 static u32 cifs_posix_convert_flags(unsigned int flags)
229 {
230 	u32 posix_flags = 0;
231 
232 	if ((flags & O_ACCMODE) == O_RDONLY)
233 		posix_flags = SMB_O_RDONLY;
234 	else if ((flags & O_ACCMODE) == O_WRONLY)
235 		posix_flags = SMB_O_WRONLY;
236 	else if ((flags & O_ACCMODE) == O_RDWR)
237 		posix_flags = SMB_O_RDWR;
238 
239 	if (flags & O_CREAT) {
240 		posix_flags |= SMB_O_CREAT;
241 		if (flags & O_EXCL)
242 			posix_flags |= SMB_O_EXCL;
243 	} else if (flags & O_EXCL)
244 		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245 			 current->comm, current->tgid);
246 
247 	if (flags & O_TRUNC)
248 		posix_flags |= SMB_O_TRUNC;
249 	/* be safe and imply O_SYNC for O_DSYNC */
250 	if (flags & O_DSYNC)
251 		posix_flags |= SMB_O_SYNC;
252 	if (flags & O_DIRECTORY)
253 		posix_flags |= SMB_O_DIRECTORY;
254 	if (flags & O_NOFOLLOW)
255 		posix_flags |= SMB_O_NOFOLLOW;
256 	if (flags & O_DIRECT)
257 		posix_flags |= SMB_O_DIRECT;
258 
259 	return posix_flags;
260 }
261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262 
263 static inline int cifs_get_disposition(unsigned int flags)
264 {
265 	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266 		return FILE_CREATE;
267 	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268 		return FILE_OVERWRITE_IF;
269 	else if ((flags & O_CREAT) == O_CREAT)
270 		return FILE_OPEN_IF;
271 	else if ((flags & O_TRUNC) == O_TRUNC)
272 		return FILE_OVERWRITE;
273 	else
274 		return FILE_OPEN;
275 }
276 
277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278 int cifs_posix_open(const char *full_path, struct inode **pinode,
279 			struct super_block *sb, int mode, unsigned int f_flags,
280 			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
281 {
282 	int rc;
283 	FILE_UNIX_BASIC_INFO *presp_data;
284 	__u32 posix_flags = 0;
285 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286 	struct cifs_fattr fattr;
287 	struct tcon_link *tlink;
288 	struct cifs_tcon *tcon;
289 
290 	cifs_dbg(FYI, "posix open %s\n", full_path);
291 
292 	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293 	if (presp_data == NULL)
294 		return -ENOMEM;
295 
296 	tlink = cifs_sb_tlink(cifs_sb);
297 	if (IS_ERR(tlink)) {
298 		rc = PTR_ERR(tlink);
299 		goto posix_open_ret;
300 	}
301 
302 	tcon = tlink_tcon(tlink);
303 	mode &= ~current_umask();
304 
305 	posix_flags = cifs_posix_convert_flags(f_flags);
306 	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307 			     poplock, full_path, cifs_sb->local_nls,
308 			     cifs_remap(cifs_sb));
309 	cifs_put_tlink(tlink);
310 
311 	if (rc)
312 		goto posix_open_ret;
313 
314 	if (presp_data->Type == cpu_to_le32(-1))
315 		goto posix_open_ret; /* open ok, caller does qpathinfo */
316 
317 	if (!pinode)
318 		goto posix_open_ret; /* caller does not need info */
319 
320 	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321 
322 	/* get new inode and set it up */
323 	if (*pinode == NULL) {
324 		cifs_fill_uniqueid(sb, &fattr);
325 		*pinode = cifs_iget(sb, &fattr);
326 		if (!*pinode) {
327 			rc = -ENOMEM;
328 			goto posix_open_ret;
329 		}
330 	} else {
331 		cifs_revalidate_mapping(*pinode);
332 		rc = cifs_fattr_to_inode(*pinode, &fattr, false);
333 	}
334 
335 posix_open_ret:
336 	kfree(presp_data);
337 	return rc;
338 }
339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340 
341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342 			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343 			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344 {
345 	int rc;
346 	int desired_access;
347 	int disposition;
348 	int create_options = CREATE_NOT_DIR;
349 	struct TCP_Server_Info *server = tcon->ses->server;
350 	struct cifs_open_parms oparms;
351 	int rdwr_for_fscache = 0;
352 
353 	if (!server->ops->open)
354 		return -ENOSYS;
355 
356 	/* If we're caching, we need to be able to fill in around partial writes. */
357 	if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
358 		rdwr_for_fscache = 1;
359 
360 	desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
361 
362 /*********************************************************************
363  *  open flag mapping table:
364  *
365  *	POSIX Flag            CIFS Disposition
366  *	----------            ----------------
367  *	O_CREAT               FILE_OPEN_IF
368  *	O_CREAT | O_EXCL      FILE_CREATE
369  *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
370  *	O_TRUNC               FILE_OVERWRITE
371  *	none of the above     FILE_OPEN
372  *
373  *	Note that there is not a direct match between disposition
374  *	FILE_SUPERSEDE (ie create whether or not file exists although
375  *	O_CREAT | O_TRUNC is similar but truncates the existing
376  *	file rather than creating a new file as FILE_SUPERSEDE does
377  *	(which uses the attributes / metadata passed in on open call)
378  *?
379  *?  O_SYNC is a reasonable match to CIFS writethrough flag
380  *?  and the read write flags match reasonably.  O_LARGEFILE
381  *?  is irrelevant because largefile support is always used
382  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
383  *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
384  *********************************************************************/
385 
386 	disposition = cifs_get_disposition(f_flags);
387 
388 	/* BB pass O_SYNC flag through on file attributes .. BB */
389 
390 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
391 	if (f_flags & O_SYNC)
392 		create_options |= CREATE_WRITE_THROUGH;
393 
394 	if (f_flags & O_DIRECT)
395 		create_options |= CREATE_NO_BUFFER;
396 
397 retry_open:
398 	oparms = (struct cifs_open_parms) {
399 		.tcon = tcon,
400 		.cifs_sb = cifs_sb,
401 		.desired_access = desired_access,
402 		.create_options = cifs_create_options(cifs_sb, create_options),
403 		.disposition = disposition,
404 		.path = full_path,
405 		.fid = fid,
406 	};
407 
408 	rc = server->ops->open(xid, &oparms, oplock, buf);
409 	if (rc) {
410 		if (rc == -EACCES && rdwr_for_fscache == 1) {
411 			desired_access = cifs_convert_flags(f_flags, 0);
412 			rdwr_for_fscache = 2;
413 			goto retry_open;
414 		}
415 		return rc;
416 	}
417 	if (rdwr_for_fscache == 2)
418 		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
419 
420 	/* TODO: Add support for calling posix query info but with passing in fid */
421 	if (tcon->unix_ext)
422 		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
423 					      xid);
424 	else
425 		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
426 					 xid, fid);
427 
428 	if (rc) {
429 		server->ops->close(xid, tcon, fid);
430 		if (rc == -ESTALE)
431 			rc = -EOPENSTALE;
432 	}
433 
434 	return rc;
435 }
436 
437 static bool
438 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
439 {
440 	struct cifs_fid_locks *cur;
441 	bool has_locks = false;
442 
443 	down_read(&cinode->lock_sem);
444 	list_for_each_entry(cur, &cinode->llist, llist) {
445 		if (!list_empty(&cur->locks)) {
446 			has_locks = true;
447 			break;
448 		}
449 	}
450 	up_read(&cinode->lock_sem);
451 	return has_locks;
452 }
453 
454 void
455 cifs_down_write(struct rw_semaphore *sem)
456 {
457 	while (!down_write_trylock(sem))
458 		msleep(10);
459 }
460 
461 static void cifsFileInfo_put_work(struct work_struct *work);
462 void serverclose_work(struct work_struct *work);
463 
464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
465 				       struct tcon_link *tlink, __u32 oplock,
466 				       const char *symlink_target)
467 {
468 	struct dentry *dentry = file_dentry(file);
469 	struct inode *inode = d_inode(dentry);
470 	struct cifsInodeInfo *cinode = CIFS_I(inode);
471 	struct cifsFileInfo *cfile;
472 	struct cifs_fid_locks *fdlocks;
473 	struct cifs_tcon *tcon = tlink_tcon(tlink);
474 	struct TCP_Server_Info *server = tcon->ses->server;
475 
476 	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
477 	if (cfile == NULL)
478 		return cfile;
479 
480 	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
481 	if (!fdlocks) {
482 		kfree(cfile);
483 		return NULL;
484 	}
485 
486 	if (symlink_target) {
487 		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
488 		if (!cfile->symlink_target) {
489 			kfree(fdlocks);
490 			kfree(cfile);
491 			return NULL;
492 		}
493 	}
494 
495 	INIT_LIST_HEAD(&fdlocks->locks);
496 	fdlocks->cfile = cfile;
497 	cfile->llist = fdlocks;
498 
499 	cfile->count = 1;
500 	cfile->pid = current->tgid;
501 	cfile->uid = current_fsuid();
502 	cfile->dentry = dget(dentry);
503 	cfile->f_flags = file->f_flags;
504 	cfile->invalidHandle = false;
505 	cfile->deferred_close_scheduled = false;
506 	cfile->tlink = cifs_get_tlink(tlink);
507 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
508 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
509 	INIT_WORK(&cfile->serverclose, serverclose_work);
510 	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
511 	mutex_init(&cfile->fh_mutex);
512 	spin_lock_init(&cfile->file_info_lock);
513 
514 	cifs_sb_active(inode->i_sb);
515 
516 	/*
517 	 * If the server returned a read oplock and we have mandatory brlocks,
518 	 * set oplock level to None.
519 	 */
520 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
521 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
522 		oplock = 0;
523 	}
524 
525 	cifs_down_write(&cinode->lock_sem);
526 	list_add(&fdlocks->llist, &cinode->llist);
527 	up_write(&cinode->lock_sem);
528 
529 	spin_lock(&tcon->open_file_lock);
530 	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
531 		oplock = fid->pending_open->oplock;
532 	list_del(&fid->pending_open->olist);
533 
534 	fid->purge_cache = false;
535 	server->ops->set_fid(cfile, fid, oplock);
536 
537 	list_add(&cfile->tlist, &tcon->openFileList);
538 	atomic_inc(&tcon->num_local_opens);
539 
540 	/* if readable file instance put first in list*/
541 	spin_lock(&cinode->open_file_lock);
542 	if (file->f_mode & FMODE_READ)
543 		list_add(&cfile->flist, &cinode->openFileList);
544 	else
545 		list_add_tail(&cfile->flist, &cinode->openFileList);
546 	spin_unlock(&cinode->open_file_lock);
547 	spin_unlock(&tcon->open_file_lock);
548 
549 	if (fid->purge_cache)
550 		cifs_zap_mapping(inode);
551 
552 	file->private_data = cfile;
553 	return cfile;
554 }
555 
556 struct cifsFileInfo *
557 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
558 {
559 	spin_lock(&cifs_file->file_info_lock);
560 	cifsFileInfo_get_locked(cifs_file);
561 	spin_unlock(&cifs_file->file_info_lock);
562 	return cifs_file;
563 }
564 
565 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
566 {
567 	struct inode *inode = d_inode(cifs_file->dentry);
568 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
569 	struct cifsLockInfo *li, *tmp;
570 	struct super_block *sb = inode->i_sb;
571 
572 	/*
573 	 * Delete any outstanding lock records. We'll lose them when the file
574 	 * is closed anyway.
575 	 */
576 	cifs_down_write(&cifsi->lock_sem);
577 	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
578 		list_del(&li->llist);
579 		cifs_del_lock_waiters(li);
580 		kfree(li);
581 	}
582 	list_del(&cifs_file->llist->llist);
583 	kfree(cifs_file->llist);
584 	up_write(&cifsi->lock_sem);
585 
586 	cifs_put_tlink(cifs_file->tlink);
587 	dput(cifs_file->dentry);
588 	cifs_sb_deactive(sb);
589 	kfree(cifs_file->symlink_target);
590 	kfree(cifs_file);
591 }
592 
593 static void cifsFileInfo_put_work(struct work_struct *work)
594 {
595 	struct cifsFileInfo *cifs_file = container_of(work,
596 			struct cifsFileInfo, put);
597 
598 	cifsFileInfo_put_final(cifs_file);
599 }
600 
601 void serverclose_work(struct work_struct *work)
602 {
603 	struct cifsFileInfo *cifs_file = container_of(work,
604 			struct cifsFileInfo, serverclose);
605 
606 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
607 
608 	struct TCP_Server_Info *server = tcon->ses->server;
609 	int rc = 0;
610 	int retries = 0;
611 	int MAX_RETRIES = 4;
612 
613 	do {
614 		if (server->ops->close_getattr)
615 			rc = server->ops->close_getattr(0, tcon, cifs_file);
616 		else if (server->ops->close)
617 			rc = server->ops->close(0, tcon, &cifs_file->fid);
618 
619 		if (rc == -EBUSY || rc == -EAGAIN) {
620 			retries++;
621 			msleep(250);
622 		}
623 	} while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
624 	);
625 
626 	if (retries == MAX_RETRIES)
627 		pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
628 
629 	if (cifs_file->offload)
630 		queue_work(fileinfo_put_wq, &cifs_file->put);
631 	else
632 		cifsFileInfo_put_final(cifs_file);
633 }
634 
635 /**
636  * cifsFileInfo_put - release a reference of file priv data
637  *
638  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
639  *
640  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
641  */
642 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
643 {
644 	_cifsFileInfo_put(cifs_file, true, true);
645 }
646 
647 /**
648  * _cifsFileInfo_put - release a reference of file priv data
649  *
650  * This may involve closing the filehandle @cifs_file out on the
651  * server. Must be called without holding tcon->open_file_lock,
652  * cinode->open_file_lock and cifs_file->file_info_lock.
653  *
654  * If @wait_for_oplock_handler is true and we are releasing the last
655  * reference, wait for any running oplock break handler of the file
656  * and cancel any pending one.
657  *
658  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
659  * @wait_oplock_handler: must be false if called from oplock_break_handler
660  * @offload:	not offloaded on close and oplock breaks
661  *
662  */
663 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
664 		       bool wait_oplock_handler, bool offload)
665 {
666 	struct inode *inode = d_inode(cifs_file->dentry);
667 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
668 	struct TCP_Server_Info *server = tcon->ses->server;
669 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
670 	struct super_block *sb = inode->i_sb;
671 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
672 	struct cifs_fid fid = {};
673 	struct cifs_pending_open open;
674 	bool oplock_break_cancelled;
675 	bool serverclose_offloaded = false;
676 
677 	spin_lock(&tcon->open_file_lock);
678 	spin_lock(&cifsi->open_file_lock);
679 	spin_lock(&cifs_file->file_info_lock);
680 
681 	cifs_file->offload = offload;
682 	if (--cifs_file->count > 0) {
683 		spin_unlock(&cifs_file->file_info_lock);
684 		spin_unlock(&cifsi->open_file_lock);
685 		spin_unlock(&tcon->open_file_lock);
686 		return;
687 	}
688 	spin_unlock(&cifs_file->file_info_lock);
689 
690 	if (server->ops->get_lease_key)
691 		server->ops->get_lease_key(inode, &fid);
692 
693 	/* store open in pending opens to make sure we don't miss lease break */
694 	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
695 
696 	/* remove it from the lists */
697 	list_del(&cifs_file->flist);
698 	list_del(&cifs_file->tlist);
699 	atomic_dec(&tcon->num_local_opens);
700 
701 	if (list_empty(&cifsi->openFileList)) {
702 		cifs_dbg(FYI, "closing last open instance for inode %p\n",
703 			 d_inode(cifs_file->dentry));
704 		/*
705 		 * In strict cache mode we need invalidate mapping on the last
706 		 * close  because it may cause a error when we open this file
707 		 * again and get at least level II oplock.
708 		 */
709 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
710 			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
711 		cifs_set_oplock_level(cifsi, 0);
712 	}
713 
714 	spin_unlock(&cifsi->open_file_lock);
715 	spin_unlock(&tcon->open_file_lock);
716 
717 	oplock_break_cancelled = wait_oplock_handler ?
718 		cancel_work_sync(&cifs_file->oplock_break) : false;
719 
720 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
721 		struct TCP_Server_Info *server = tcon->ses->server;
722 		unsigned int xid;
723 		int rc = 0;
724 
725 		xid = get_xid();
726 		if (server->ops->close_getattr)
727 			rc = server->ops->close_getattr(xid, tcon, cifs_file);
728 		else if (server->ops->close)
729 			rc = server->ops->close(xid, tcon, &cifs_file->fid);
730 		_free_xid(xid);
731 
732 		if (rc == -EBUSY || rc == -EAGAIN) {
733 			// Server close failed, hence offloading it as an async op
734 			queue_work(serverclose_wq, &cifs_file->serverclose);
735 			serverclose_offloaded = true;
736 		}
737 	}
738 
739 	if (oplock_break_cancelled)
740 		cifs_done_oplock_break(cifsi);
741 
742 	cifs_del_pending_open(&open);
743 
744 	// if serverclose has been offloaded to wq (on failure), it will
745 	// handle offloading put as well. If serverclose not offloaded,
746 	// we need to handle offloading put here.
747 	if (!serverclose_offloaded) {
748 		if (offload)
749 			queue_work(fileinfo_put_wq, &cifs_file->put);
750 		else
751 			cifsFileInfo_put_final(cifs_file);
752 	}
753 }
754 
755 int cifs_open(struct inode *inode, struct file *file)
756 
757 {
758 	int rc = -EACCES;
759 	unsigned int xid;
760 	__u32 oplock;
761 	struct cifs_sb_info *cifs_sb;
762 	struct TCP_Server_Info *server;
763 	struct cifs_tcon *tcon;
764 	struct tcon_link *tlink;
765 	struct cifsFileInfo *cfile = NULL;
766 	void *page;
767 	const char *full_path;
768 	bool posix_open_ok = false;
769 	struct cifs_fid fid = {};
770 	struct cifs_pending_open open;
771 	struct cifs_open_info_data data = {};
772 
773 	xid = get_xid();
774 
775 	cifs_sb = CIFS_SB(inode->i_sb);
776 	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
777 		free_xid(xid);
778 		return -EIO;
779 	}
780 
781 	tlink = cifs_sb_tlink(cifs_sb);
782 	if (IS_ERR(tlink)) {
783 		free_xid(xid);
784 		return PTR_ERR(tlink);
785 	}
786 	tcon = tlink_tcon(tlink);
787 	server = tcon->ses->server;
788 
789 	page = alloc_dentry_path();
790 	full_path = build_path_from_dentry(file_dentry(file), page);
791 	if (IS_ERR(full_path)) {
792 		rc = PTR_ERR(full_path);
793 		goto out;
794 	}
795 
796 	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
797 		 inode, file->f_flags, full_path);
798 
799 	if (file->f_flags & O_DIRECT &&
800 	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
801 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
802 			file->f_op = &cifs_file_direct_nobrl_ops;
803 		else
804 			file->f_op = &cifs_file_direct_ops;
805 	}
806 
807 	/* Get the cached handle as SMB2 close is deferred */
808 	rc = cifs_get_readable_path(tcon, full_path, &cfile);
809 	if (rc == 0) {
810 		if (file->f_flags == cfile->f_flags) {
811 			file->private_data = cfile;
812 			spin_lock(&CIFS_I(inode)->deferred_lock);
813 			cifs_del_deferred_close(cfile);
814 			spin_unlock(&CIFS_I(inode)->deferred_lock);
815 			goto use_cache;
816 		} else {
817 			_cifsFileInfo_put(cfile, true, false);
818 		}
819 	}
820 
821 	if (server->oplocks)
822 		oplock = REQ_OPLOCK;
823 	else
824 		oplock = 0;
825 
826 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
827 	if (!tcon->broken_posix_open && tcon->unix_ext &&
828 	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
829 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
830 		/* can not refresh inode info since size could be stale */
831 		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
832 				cifs_sb->ctx->file_mode /* ignored */,
833 				file->f_flags, &oplock, &fid.netfid, xid);
834 		if (rc == 0) {
835 			cifs_dbg(FYI, "posix open succeeded\n");
836 			posix_open_ok = true;
837 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
838 			if (tcon->ses->serverNOS)
839 				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
840 					 tcon->ses->ip_addr,
841 					 tcon->ses->serverNOS);
842 			tcon->broken_posix_open = true;
843 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
844 			 (rc != -EOPNOTSUPP)) /* path not found or net err */
845 			goto out;
846 		/*
847 		 * Else fallthrough to retry open the old way on network i/o
848 		 * or DFS errors.
849 		 */
850 	}
851 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
852 
853 	if (server->ops->get_lease_key)
854 		server->ops->get_lease_key(inode, &fid);
855 
856 	cifs_add_pending_open(&fid, tlink, &open);
857 
858 	if (!posix_open_ok) {
859 		if (server->ops->get_lease_key)
860 			server->ops->get_lease_key(inode, &fid);
861 
862 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
863 				  xid, &data);
864 		if (rc) {
865 			cifs_del_pending_open(&open);
866 			goto out;
867 		}
868 	}
869 
870 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
871 	if (cfile == NULL) {
872 		if (server->ops->close)
873 			server->ops->close(xid, tcon, &fid);
874 		cifs_del_pending_open(&open);
875 		rc = -ENOMEM;
876 		goto out;
877 	}
878 
879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
880 	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
881 		/*
882 		 * Time to set mode which we can not set earlier due to
883 		 * problems creating new read-only files.
884 		 */
885 		struct cifs_unix_set_info_args args = {
886 			.mode	= inode->i_mode,
887 			.uid	= INVALID_UID, /* no change */
888 			.gid	= INVALID_GID, /* no change */
889 			.ctime	= NO_CHANGE_64,
890 			.atime	= NO_CHANGE_64,
891 			.mtime	= NO_CHANGE_64,
892 			.device	= 0,
893 		};
894 		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
895 				       cfile->pid);
896 	}
897 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
898 
899 use_cache:
900 	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
901 			   file->f_mode & FMODE_WRITE);
902 	if (!(file->f_flags & O_DIRECT))
903 		goto out;
904 	if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
905 		goto out;
906 	cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
907 
908 out:
909 	free_dentry_path(page);
910 	free_xid(xid);
911 	cifs_put_tlink(tlink);
912 	cifs_free_open_info(&data);
913 	return rc;
914 }
915 
916 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
917 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
919 
920 /*
921  * Try to reacquire byte range locks that were released when session
922  * to server was lost.
923  */
924 static int
925 cifs_relock_file(struct cifsFileInfo *cfile)
926 {
927 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
928 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
929 	int rc = 0;
930 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
931 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
932 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
933 
934 	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
935 	if (cinode->can_cache_brlcks) {
936 		/* can cache locks - no need to relock */
937 		up_read(&cinode->lock_sem);
938 		return rc;
939 	}
940 
941 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
942 	if (cap_unix(tcon->ses) &&
943 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
944 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
945 		rc = cifs_push_posix_locks(cfile);
946 	else
947 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
948 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
949 
950 	up_read(&cinode->lock_sem);
951 	return rc;
952 }
953 
954 static int
955 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
956 {
957 	int rc = -EACCES;
958 	unsigned int xid;
959 	__u32 oplock;
960 	struct cifs_sb_info *cifs_sb;
961 	struct cifs_tcon *tcon;
962 	struct TCP_Server_Info *server;
963 	struct cifsInodeInfo *cinode;
964 	struct inode *inode;
965 	void *page;
966 	const char *full_path;
967 	int desired_access;
968 	int disposition = FILE_OPEN;
969 	int create_options = CREATE_NOT_DIR;
970 	struct cifs_open_parms oparms;
971 	int rdwr_for_fscache = 0;
972 
973 	xid = get_xid();
974 	mutex_lock(&cfile->fh_mutex);
975 	if (!cfile->invalidHandle) {
976 		mutex_unlock(&cfile->fh_mutex);
977 		free_xid(xid);
978 		return 0;
979 	}
980 
981 	inode = d_inode(cfile->dentry);
982 	cifs_sb = CIFS_SB(inode->i_sb);
983 	tcon = tlink_tcon(cfile->tlink);
984 	server = tcon->ses->server;
985 
986 	/*
987 	 * Can not grab rename sem here because various ops, including those
988 	 * that already have the rename sem can end up causing writepage to get
989 	 * called and if the server was down that means we end up here, and we
990 	 * can never tell if the caller already has the rename_sem.
991 	 */
992 	page = alloc_dentry_path();
993 	full_path = build_path_from_dentry(cfile->dentry, page);
994 	if (IS_ERR(full_path)) {
995 		mutex_unlock(&cfile->fh_mutex);
996 		free_dentry_path(page);
997 		free_xid(xid);
998 		return PTR_ERR(full_path);
999 	}
1000 
1001 	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
1002 		 inode, cfile->f_flags, full_path);
1003 
1004 	if (tcon->ses->server->oplocks)
1005 		oplock = REQ_OPLOCK;
1006 	else
1007 		oplock = 0;
1008 
1009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1010 	if (tcon->unix_ext && cap_unix(tcon->ses) &&
1011 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
1012 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
1013 		/*
1014 		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
1015 		 * original open. Must mask them off for a reopen.
1016 		 */
1017 		unsigned int oflags = cfile->f_flags &
1018 						~(O_CREAT | O_EXCL | O_TRUNC);
1019 
1020 		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
1021 				     cifs_sb->ctx->file_mode /* ignored */,
1022 				     oflags, &oplock, &cfile->fid.netfid, xid);
1023 		if (rc == 0) {
1024 			cifs_dbg(FYI, "posix reopen succeeded\n");
1025 			oparms.reconnect = true;
1026 			goto reopen_success;
1027 		}
1028 		/*
1029 		 * fallthrough to retry open the old way on errors, especially
1030 		 * in the reconnect path it is important to retry hard
1031 		 */
1032 	}
1033 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1034 
1035 	/* If we're caching, we need to be able to fill in around partial writes. */
1036 	if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
1037 		rdwr_for_fscache = 1;
1038 
1039 	desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
1040 
1041 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
1042 	if (cfile->f_flags & O_SYNC)
1043 		create_options |= CREATE_WRITE_THROUGH;
1044 
1045 	if (cfile->f_flags & O_DIRECT)
1046 		create_options |= CREATE_NO_BUFFER;
1047 
1048 	if (server->ops->get_lease_key)
1049 		server->ops->get_lease_key(inode, &cfile->fid);
1050 
1051 retry_open:
1052 	oparms = (struct cifs_open_parms) {
1053 		.tcon = tcon,
1054 		.cifs_sb = cifs_sb,
1055 		.desired_access = desired_access,
1056 		.create_options = cifs_create_options(cifs_sb, create_options),
1057 		.disposition = disposition,
1058 		.path = full_path,
1059 		.fid = &cfile->fid,
1060 		.reconnect = true,
1061 	};
1062 
1063 	/*
1064 	 * Can not refresh inode by passing in file_info buf to be returned by
1065 	 * ops->open and then calling get_inode_info with returned buf since
1066 	 * file might have write behind data that needs to be flushed and server
1067 	 * version of file size can be stale. If we knew for sure that inode was
1068 	 * not dirty locally we could do this.
1069 	 */
1070 	rc = server->ops->open(xid, &oparms, &oplock, NULL);
1071 	if (rc == -ENOENT && oparms.reconnect == false) {
1072 		/* durable handle timeout is expired - open the file again */
1073 		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1074 		/* indicate that we need to relock the file */
1075 		oparms.reconnect = true;
1076 	}
1077 	if (rc == -EACCES && rdwr_for_fscache == 1) {
1078 		desired_access = cifs_convert_flags(cfile->f_flags, 0);
1079 		rdwr_for_fscache = 2;
1080 		goto retry_open;
1081 	}
1082 
1083 	if (rc) {
1084 		mutex_unlock(&cfile->fh_mutex);
1085 		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1086 		cifs_dbg(FYI, "oplock: %d\n", oplock);
1087 		goto reopen_error_exit;
1088 	}
1089 
1090 	if (rdwr_for_fscache == 2)
1091 		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
1092 
1093 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1094 reopen_success:
1095 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1096 	cfile->invalidHandle = false;
1097 	mutex_unlock(&cfile->fh_mutex);
1098 	cinode = CIFS_I(inode);
1099 
1100 	if (can_flush) {
1101 		rc = filemap_write_and_wait(inode->i_mapping);
1102 		if (!is_interrupt_error(rc))
1103 			mapping_set_error(inode->i_mapping, rc);
1104 
1105 		if (tcon->posix_extensions) {
1106 			rc = smb311_posix_get_inode_info(&inode, full_path,
1107 							 NULL, inode->i_sb, xid);
1108 		} else if (tcon->unix_ext) {
1109 			rc = cifs_get_inode_info_unix(&inode, full_path,
1110 						      inode->i_sb, xid);
1111 		} else {
1112 			rc = cifs_get_inode_info(&inode, full_path, NULL,
1113 						 inode->i_sb, xid, NULL);
1114 		}
1115 	}
1116 	/*
1117 	 * Else we are writing out data to server already and could deadlock if
1118 	 * we tried to flush data, and since we do not know if we have data that
1119 	 * would invalidate the current end of file on the server we can not go
1120 	 * to the server to get the new inode info.
1121 	 */
1122 
1123 	/*
1124 	 * If the server returned a read oplock and we have mandatory brlocks,
1125 	 * set oplock level to None.
1126 	 */
1127 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1128 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1129 		oplock = 0;
1130 	}
1131 
1132 	server->ops->set_fid(cfile, &cfile->fid, oplock);
1133 	if (oparms.reconnect)
1134 		cifs_relock_file(cfile);
1135 
1136 reopen_error_exit:
1137 	free_dentry_path(page);
1138 	free_xid(xid);
1139 	return rc;
1140 }
1141 
1142 void smb2_deferred_work_close(struct work_struct *work)
1143 {
1144 	struct cifsFileInfo *cfile = container_of(work,
1145 			struct cifsFileInfo, deferred.work);
1146 
1147 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1148 	cifs_del_deferred_close(cfile);
1149 	cfile->deferred_close_scheduled = false;
1150 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1151 	_cifsFileInfo_put(cfile, true, false);
1152 }
1153 
1154 static bool
1155 smb2_can_defer_close(struct inode *inode, struct cifs_deferred_close *dclose)
1156 {
1157 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1158 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1159 
1160 	return (cifs_sb->ctx->closetimeo && cinode->lease_granted && dclose &&
1161 			(cinode->oplock == CIFS_CACHE_RHW_FLG ||
1162 			 cinode->oplock == CIFS_CACHE_RH_FLG) &&
1163 			!test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags));
1164 
1165 }
1166 
1167 int cifs_close(struct inode *inode, struct file *file)
1168 {
1169 	struct cifsFileInfo *cfile;
1170 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1171 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1172 	struct cifs_deferred_close *dclose;
1173 
1174 	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1175 
1176 	if (file->private_data != NULL) {
1177 		cfile = file->private_data;
1178 		file->private_data = NULL;
1179 		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1180 		if ((cfile->status_file_deleted == false) &&
1181 		    (smb2_can_defer_close(inode, dclose))) {
1182 			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1183 				inode_set_mtime_to_ts(inode,
1184 						      inode_set_ctime_current(inode));
1185 			}
1186 			spin_lock(&cinode->deferred_lock);
1187 			cifs_add_deferred_close(cfile, dclose);
1188 			if (cfile->deferred_close_scheduled &&
1189 			    delayed_work_pending(&cfile->deferred)) {
1190 				/*
1191 				 * If there is no pending work, mod_delayed_work queues new work.
1192 				 * So, Increase the ref count to avoid use-after-free.
1193 				 */
1194 				if (!mod_delayed_work(deferredclose_wq,
1195 						&cfile->deferred, cifs_sb->ctx->closetimeo))
1196 					cifsFileInfo_get(cfile);
1197 			} else {
1198 				/* Deferred close for files */
1199 				queue_delayed_work(deferredclose_wq,
1200 						&cfile->deferred, cifs_sb->ctx->closetimeo);
1201 				cfile->deferred_close_scheduled = true;
1202 				spin_unlock(&cinode->deferred_lock);
1203 				return 0;
1204 			}
1205 			spin_unlock(&cinode->deferred_lock);
1206 			_cifsFileInfo_put(cfile, true, false);
1207 		} else {
1208 			_cifsFileInfo_put(cfile, true, false);
1209 			kfree(dclose);
1210 		}
1211 	}
1212 
1213 	/* return code from the ->release op is always ignored */
1214 	return 0;
1215 }
1216 
1217 void
1218 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1219 {
1220 	struct cifsFileInfo *open_file, *tmp;
1221 	struct list_head tmp_list;
1222 
1223 	if (!tcon->use_persistent || !tcon->need_reopen_files)
1224 		return;
1225 
1226 	tcon->need_reopen_files = false;
1227 
1228 	cifs_dbg(FYI, "Reopen persistent handles\n");
1229 	INIT_LIST_HEAD(&tmp_list);
1230 
1231 	/* list all files open on tree connection, reopen resilient handles  */
1232 	spin_lock(&tcon->open_file_lock);
1233 	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1234 		if (!open_file->invalidHandle)
1235 			continue;
1236 		cifsFileInfo_get(open_file);
1237 		list_add_tail(&open_file->rlist, &tmp_list);
1238 	}
1239 	spin_unlock(&tcon->open_file_lock);
1240 
1241 	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1242 		if (cifs_reopen_file(open_file, false /* do not flush */))
1243 			tcon->need_reopen_files = true;
1244 		list_del_init(&open_file->rlist);
1245 		cifsFileInfo_put(open_file);
1246 	}
1247 }
1248 
1249 int cifs_closedir(struct inode *inode, struct file *file)
1250 {
1251 	int rc = 0;
1252 	unsigned int xid;
1253 	struct cifsFileInfo *cfile = file->private_data;
1254 	struct cifs_tcon *tcon;
1255 	struct TCP_Server_Info *server;
1256 	char *buf;
1257 
1258 	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1259 
1260 	if (cfile == NULL)
1261 		return rc;
1262 
1263 	xid = get_xid();
1264 	tcon = tlink_tcon(cfile->tlink);
1265 	server = tcon->ses->server;
1266 
1267 	cifs_dbg(FYI, "Freeing private data in close dir\n");
1268 	spin_lock(&cfile->file_info_lock);
1269 	if (server->ops->dir_needs_close(cfile)) {
1270 		cfile->invalidHandle = true;
1271 		spin_unlock(&cfile->file_info_lock);
1272 		if (server->ops->close_dir)
1273 			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1274 		else
1275 			rc = -ENOSYS;
1276 		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1277 		/* not much we can do if it fails anyway, ignore rc */
1278 		rc = 0;
1279 	} else
1280 		spin_unlock(&cfile->file_info_lock);
1281 
1282 	buf = cfile->srch_inf.ntwrk_buf_start;
1283 	if (buf) {
1284 		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1285 		cfile->srch_inf.ntwrk_buf_start = NULL;
1286 		if (cfile->srch_inf.smallBuf)
1287 			cifs_small_buf_release(buf);
1288 		else
1289 			cifs_buf_release(buf);
1290 	}
1291 
1292 	cifs_put_tlink(cfile->tlink);
1293 	kfree(file->private_data);
1294 	file->private_data = NULL;
1295 	/* BB can we lock the filestruct while this is going on? */
1296 	free_xid(xid);
1297 	return rc;
1298 }
1299 
1300 static struct cifsLockInfo *
1301 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1302 {
1303 	struct cifsLockInfo *lock =
1304 		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1305 	if (!lock)
1306 		return lock;
1307 	lock->offset = offset;
1308 	lock->length = length;
1309 	lock->type = type;
1310 	lock->pid = current->tgid;
1311 	lock->flags = flags;
1312 	INIT_LIST_HEAD(&lock->blist);
1313 	init_waitqueue_head(&lock->block_q);
1314 	return lock;
1315 }
1316 
1317 void
1318 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1319 {
1320 	struct cifsLockInfo *li, *tmp;
1321 	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1322 		list_del_init(&li->blist);
1323 		wake_up(&li->block_q);
1324 	}
1325 }
1326 
1327 #define CIFS_LOCK_OP	0
1328 #define CIFS_READ_OP	1
1329 #define CIFS_WRITE_OP	2
1330 
1331 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1332 static bool
1333 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1334 			    __u64 length, __u8 type, __u16 flags,
1335 			    struct cifsFileInfo *cfile,
1336 			    struct cifsLockInfo **conf_lock, int rw_check)
1337 {
1338 	struct cifsLockInfo *li;
1339 	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1340 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1341 
1342 	list_for_each_entry(li, &fdlocks->locks, llist) {
1343 		if (offset + length <= li->offset ||
1344 		    offset >= li->offset + li->length)
1345 			continue;
1346 		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1347 		    server->ops->compare_fids(cfile, cur_cfile)) {
1348 			/* shared lock prevents write op through the same fid */
1349 			if (!(li->type & server->vals->shared_lock_type) ||
1350 			    rw_check != CIFS_WRITE_OP)
1351 				continue;
1352 		}
1353 		if ((type & server->vals->shared_lock_type) &&
1354 		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1355 		     current->tgid == li->pid) || type == li->type))
1356 			continue;
1357 		if (rw_check == CIFS_LOCK_OP &&
1358 		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1359 		    server->ops->compare_fids(cfile, cur_cfile))
1360 			continue;
1361 		if (conf_lock)
1362 			*conf_lock = li;
1363 		return true;
1364 	}
1365 	return false;
1366 }
1367 
1368 bool
1369 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1370 			__u8 type, __u16 flags,
1371 			struct cifsLockInfo **conf_lock, int rw_check)
1372 {
1373 	bool rc = false;
1374 	struct cifs_fid_locks *cur;
1375 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1376 
1377 	list_for_each_entry(cur, &cinode->llist, llist) {
1378 		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1379 						 flags, cfile, conf_lock,
1380 						 rw_check);
1381 		if (rc)
1382 			break;
1383 	}
1384 
1385 	return rc;
1386 }
1387 
1388 /*
1389  * Check if there is another lock that prevents us to set the lock (mandatory
1390  * style). If such a lock exists, update the flock structure with its
1391  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1392  * or leave it the same if we can't. Returns 0 if we don't need to request to
1393  * the server or 1 otherwise.
1394  */
1395 static int
1396 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1397 	       __u8 type, struct file_lock *flock)
1398 {
1399 	int rc = 0;
1400 	struct cifsLockInfo *conf_lock;
1401 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1402 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1403 	bool exist;
1404 
1405 	down_read(&cinode->lock_sem);
1406 
1407 	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1408 					flock->c.flc_flags, &conf_lock,
1409 					CIFS_LOCK_OP);
1410 	if (exist) {
1411 		flock->fl_start = conf_lock->offset;
1412 		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1413 		flock->c.flc_pid = conf_lock->pid;
1414 		if (conf_lock->type & server->vals->shared_lock_type)
1415 			flock->c.flc_type = F_RDLCK;
1416 		else
1417 			flock->c.flc_type = F_WRLCK;
1418 	} else if (!cinode->can_cache_brlcks)
1419 		rc = 1;
1420 	else
1421 		flock->c.flc_type = F_UNLCK;
1422 
1423 	up_read(&cinode->lock_sem);
1424 	return rc;
1425 }
1426 
1427 static void
1428 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1429 {
1430 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1431 	cifs_down_write(&cinode->lock_sem);
1432 	list_add_tail(&lock->llist, &cfile->llist->locks);
1433 	up_write(&cinode->lock_sem);
1434 }
1435 
1436 /*
1437  * Set the byte-range lock (mandatory style). Returns:
1438  * 1) 0, if we set the lock and don't need to request to the server;
1439  * 2) 1, if no locks prevent us but we need to request to the server;
1440  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1441  */
1442 static int
1443 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1444 		 bool wait)
1445 {
1446 	struct cifsLockInfo *conf_lock;
1447 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1448 	bool exist;
1449 	int rc = 0;
1450 
1451 try_again:
1452 	exist = false;
1453 	cifs_down_write(&cinode->lock_sem);
1454 
1455 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1456 					lock->type, lock->flags, &conf_lock,
1457 					CIFS_LOCK_OP);
1458 	if (!exist && cinode->can_cache_brlcks) {
1459 		list_add_tail(&lock->llist, &cfile->llist->locks);
1460 		up_write(&cinode->lock_sem);
1461 		return rc;
1462 	}
1463 
1464 	if (!exist)
1465 		rc = 1;
1466 	else if (!wait)
1467 		rc = -EACCES;
1468 	else {
1469 		list_add_tail(&lock->blist, &conf_lock->blist);
1470 		up_write(&cinode->lock_sem);
1471 		rc = wait_event_interruptible(lock->block_q,
1472 					(lock->blist.prev == &lock->blist) &&
1473 					(lock->blist.next == &lock->blist));
1474 		if (!rc)
1475 			goto try_again;
1476 		cifs_down_write(&cinode->lock_sem);
1477 		list_del_init(&lock->blist);
1478 	}
1479 
1480 	up_write(&cinode->lock_sem);
1481 	return rc;
1482 }
1483 
1484 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1485 /*
1486  * Check if there is another lock that prevents us to set the lock (posix
1487  * style). If such a lock exists, update the flock structure with its
1488  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1489  * or leave it the same if we can't. Returns 0 if we don't need to request to
1490  * the server or 1 otherwise.
1491  */
1492 static int
1493 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1494 {
1495 	int rc = 0;
1496 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1497 	unsigned char saved_type = flock->c.flc_type;
1498 
1499 	if ((flock->c.flc_flags & FL_POSIX) == 0)
1500 		return 1;
1501 
1502 	down_read(&cinode->lock_sem);
1503 	posix_test_lock(file, flock);
1504 
1505 	if (lock_is_unlock(flock) && !cinode->can_cache_brlcks) {
1506 		flock->c.flc_type = saved_type;
1507 		rc = 1;
1508 	}
1509 
1510 	up_read(&cinode->lock_sem);
1511 	return rc;
1512 }
1513 
1514 /*
1515  * Set the byte-range lock (posix style). Returns:
1516  * 1) <0, if the error occurs while setting the lock;
1517  * 2) 0, if we set the lock and don't need to request to the server;
1518  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1519  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1520  */
1521 static int
1522 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1523 {
1524 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1525 	int rc = FILE_LOCK_DEFERRED + 1;
1526 
1527 	if ((flock->c.flc_flags & FL_POSIX) == 0)
1528 		return rc;
1529 
1530 	cifs_down_write(&cinode->lock_sem);
1531 	if (!cinode->can_cache_brlcks) {
1532 		up_write(&cinode->lock_sem);
1533 		return rc;
1534 	}
1535 
1536 	rc = posix_lock_file(file, flock, NULL);
1537 	up_write(&cinode->lock_sem);
1538 	return rc;
1539 }
1540 
1541 int
1542 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1543 {
1544 	unsigned int xid;
1545 	int rc = 0, stored_rc;
1546 	struct cifsLockInfo *li, *tmp;
1547 	struct cifs_tcon *tcon;
1548 	unsigned int num, max_num, max_buf;
1549 	LOCKING_ANDX_RANGE *buf, *cur;
1550 	static const int types[] = {
1551 		LOCKING_ANDX_LARGE_FILES,
1552 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1553 	};
1554 	int i;
1555 
1556 	xid = get_xid();
1557 	tcon = tlink_tcon(cfile->tlink);
1558 
1559 	/*
1560 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1561 	 * and check it before using.
1562 	 */
1563 	max_buf = tcon->ses->server->maxBuf;
1564 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1565 		free_xid(xid);
1566 		return -EINVAL;
1567 	}
1568 
1569 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1570 		     PAGE_SIZE);
1571 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1572 			PAGE_SIZE);
1573 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1574 						sizeof(LOCKING_ANDX_RANGE);
1575 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1576 	if (!buf) {
1577 		free_xid(xid);
1578 		return -ENOMEM;
1579 	}
1580 
1581 	for (i = 0; i < 2; i++) {
1582 		cur = buf;
1583 		num = 0;
1584 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1585 			if (li->type != types[i])
1586 				continue;
1587 			cur->Pid = cpu_to_le16(li->pid);
1588 			cur->LengthLow = cpu_to_le32((u32)li->length);
1589 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1590 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1591 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1592 			if (++num == max_num) {
1593 				stored_rc = cifs_lockv(xid, tcon,
1594 						       cfile->fid.netfid,
1595 						       (__u8)li->type, 0, num,
1596 						       buf);
1597 				if (stored_rc)
1598 					rc = stored_rc;
1599 				cur = buf;
1600 				num = 0;
1601 			} else
1602 				cur++;
1603 		}
1604 
1605 		if (num) {
1606 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1607 					       (__u8)types[i], 0, num, buf);
1608 			if (stored_rc)
1609 				rc = stored_rc;
1610 		}
1611 	}
1612 
1613 	kfree(buf);
1614 	free_xid(xid);
1615 	return rc;
1616 }
1617 
1618 static __u32
1619 hash_lockowner(fl_owner_t owner)
1620 {
1621 	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1622 }
1623 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1624 
1625 struct lock_to_push {
1626 	struct list_head llist;
1627 	__u64 offset;
1628 	__u64 length;
1629 	__u32 pid;
1630 	__u16 netfid;
1631 	__u8 type;
1632 };
1633 
1634 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1635 static int
1636 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1637 {
1638 	struct inode *inode = d_inode(cfile->dentry);
1639 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1640 	struct file_lock *flock;
1641 	struct file_lock_context *flctx = locks_inode_context(inode);
1642 	unsigned int count = 0, i;
1643 	int rc = 0, xid, type;
1644 	struct list_head locks_to_send, *el;
1645 	struct lock_to_push *lck, *tmp;
1646 	__u64 length;
1647 
1648 	xid = get_xid();
1649 
1650 	if (!flctx)
1651 		goto out;
1652 
1653 	spin_lock(&flctx->flc_lock);
1654 	list_for_each(el, &flctx->flc_posix) {
1655 		count++;
1656 	}
1657 	spin_unlock(&flctx->flc_lock);
1658 
1659 	INIT_LIST_HEAD(&locks_to_send);
1660 
1661 	/*
1662 	 * Allocating count locks is enough because no FL_POSIX locks can be
1663 	 * added to the list while we are holding cinode->lock_sem that
1664 	 * protects locking operations of this inode.
1665 	 */
1666 	for (i = 0; i < count; i++) {
1667 		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1668 		if (!lck) {
1669 			rc = -ENOMEM;
1670 			goto err_out;
1671 		}
1672 		list_add_tail(&lck->llist, &locks_to_send);
1673 	}
1674 
1675 	el = locks_to_send.next;
1676 	spin_lock(&flctx->flc_lock);
1677 	for_each_file_lock(flock, &flctx->flc_posix) {
1678 		unsigned char ftype = flock->c.flc_type;
1679 
1680 		if (el == &locks_to_send) {
1681 			/*
1682 			 * The list ended. We don't have enough allocated
1683 			 * structures - something is really wrong.
1684 			 */
1685 			cifs_dbg(VFS, "Can't push all brlocks!\n");
1686 			break;
1687 		}
1688 		length = cifs_flock_len(flock);
1689 		if (ftype == F_RDLCK || ftype == F_SHLCK)
1690 			type = CIFS_RDLCK;
1691 		else
1692 			type = CIFS_WRLCK;
1693 		lck = list_entry(el, struct lock_to_push, llist);
1694 		lck->pid = hash_lockowner(flock->c.flc_owner);
1695 		lck->netfid = cfile->fid.netfid;
1696 		lck->length = length;
1697 		lck->type = type;
1698 		lck->offset = flock->fl_start;
1699 	}
1700 	spin_unlock(&flctx->flc_lock);
1701 
1702 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1703 		int stored_rc;
1704 
1705 		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1706 					     lck->offset, lck->length, NULL,
1707 					     lck->type, 0);
1708 		if (stored_rc)
1709 			rc = stored_rc;
1710 		list_del(&lck->llist);
1711 		kfree(lck);
1712 	}
1713 
1714 out:
1715 	free_xid(xid);
1716 	return rc;
1717 err_out:
1718 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1719 		list_del(&lck->llist);
1720 		kfree(lck);
1721 	}
1722 	goto out;
1723 }
1724 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1725 
1726 static int
1727 cifs_push_locks(struct cifsFileInfo *cfile)
1728 {
1729 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1730 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1731 	int rc = 0;
1732 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1733 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1734 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1735 
1736 	/* we are going to update can_cache_brlcks here - need a write access */
1737 	cifs_down_write(&cinode->lock_sem);
1738 	if (!cinode->can_cache_brlcks) {
1739 		up_write(&cinode->lock_sem);
1740 		return rc;
1741 	}
1742 
1743 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1744 	if (cap_unix(tcon->ses) &&
1745 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1746 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1747 		rc = cifs_push_posix_locks(cfile);
1748 	else
1749 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1750 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1751 
1752 	cinode->can_cache_brlcks = false;
1753 	up_write(&cinode->lock_sem);
1754 	return rc;
1755 }
1756 
1757 static void
1758 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1759 		bool *wait_flag, struct TCP_Server_Info *server)
1760 {
1761 	if (flock->c.flc_flags & FL_POSIX)
1762 		cifs_dbg(FYI, "Posix\n");
1763 	if (flock->c.flc_flags & FL_FLOCK)
1764 		cifs_dbg(FYI, "Flock\n");
1765 	if (flock->c.flc_flags & FL_SLEEP) {
1766 		cifs_dbg(FYI, "Blocking lock\n");
1767 		*wait_flag = true;
1768 	}
1769 	if (flock->c.flc_flags & FL_ACCESS)
1770 		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1771 	if (flock->c.flc_flags & FL_LEASE)
1772 		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1773 	if (flock->c.flc_flags &
1774 	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1775 	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1776 		cifs_dbg(FYI, "Unknown lock flags 0x%x\n",
1777 		         flock->c.flc_flags);
1778 
1779 	*type = server->vals->large_lock_type;
1780 	if (lock_is_write(flock)) {
1781 		cifs_dbg(FYI, "F_WRLCK\n");
1782 		*type |= server->vals->exclusive_lock_type;
1783 		*lock = 1;
1784 	} else if (lock_is_unlock(flock)) {
1785 		cifs_dbg(FYI, "F_UNLCK\n");
1786 		*type |= server->vals->unlock_lock_type;
1787 		*unlock = 1;
1788 		/* Check if unlock includes more than one lock range */
1789 	} else if (lock_is_read(flock)) {
1790 		cifs_dbg(FYI, "F_RDLCK\n");
1791 		*type |= server->vals->shared_lock_type;
1792 		*lock = 1;
1793 	} else if (flock->c.flc_type == F_EXLCK) {
1794 		cifs_dbg(FYI, "F_EXLCK\n");
1795 		*type |= server->vals->exclusive_lock_type;
1796 		*lock = 1;
1797 	} else if (flock->c.flc_type == F_SHLCK) {
1798 		cifs_dbg(FYI, "F_SHLCK\n");
1799 		*type |= server->vals->shared_lock_type;
1800 		*lock = 1;
1801 	} else
1802 		cifs_dbg(FYI, "Unknown type of lock\n");
1803 }
1804 
1805 static int
1806 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1807 	   bool wait_flag, bool posix_lck, unsigned int xid)
1808 {
1809 	int rc = 0;
1810 	__u64 length = cifs_flock_len(flock);
1811 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1812 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1813 	struct TCP_Server_Info *server = tcon->ses->server;
1814 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1815 	__u16 netfid = cfile->fid.netfid;
1816 
1817 	if (posix_lck) {
1818 		int posix_lock_type;
1819 
1820 		rc = cifs_posix_lock_test(file, flock);
1821 		if (!rc)
1822 			return rc;
1823 
1824 		if (type & server->vals->shared_lock_type)
1825 			posix_lock_type = CIFS_RDLCK;
1826 		else
1827 			posix_lock_type = CIFS_WRLCK;
1828 		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1829 				      hash_lockowner(flock->c.flc_owner),
1830 				      flock->fl_start, length, flock,
1831 				      posix_lock_type, wait_flag);
1832 		return rc;
1833 	}
1834 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1835 
1836 	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1837 	if (!rc)
1838 		return rc;
1839 
1840 	/* BB we could chain these into one lock request BB */
1841 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1842 				    1, 0, false);
1843 	if (rc == 0) {
1844 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1845 					    type, 0, 1, false);
1846 		flock->c.flc_type = F_UNLCK;
1847 		if (rc != 0)
1848 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1849 				 rc);
1850 		return 0;
1851 	}
1852 
1853 	if (type & server->vals->shared_lock_type) {
1854 		flock->c.flc_type = F_WRLCK;
1855 		return 0;
1856 	}
1857 
1858 	type &= ~server->vals->exclusive_lock_type;
1859 
1860 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1861 				    type | server->vals->shared_lock_type,
1862 				    1, 0, false);
1863 	if (rc == 0) {
1864 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1865 			type | server->vals->shared_lock_type, 0, 1, false);
1866 		flock->c.flc_type = F_RDLCK;
1867 		if (rc != 0)
1868 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1869 				 rc);
1870 	} else
1871 		flock->c.flc_type = F_WRLCK;
1872 
1873 	return 0;
1874 }
1875 
1876 void
1877 cifs_move_llist(struct list_head *source, struct list_head *dest)
1878 {
1879 	struct list_head *li, *tmp;
1880 	list_for_each_safe(li, tmp, source)
1881 		list_move(li, dest);
1882 }
1883 
1884 void
1885 cifs_free_llist(struct list_head *llist)
1886 {
1887 	struct cifsLockInfo *li, *tmp;
1888 	list_for_each_entry_safe(li, tmp, llist, llist) {
1889 		cifs_del_lock_waiters(li);
1890 		list_del(&li->llist);
1891 		kfree(li);
1892 	}
1893 }
1894 
1895 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1896 int
1897 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1898 		  unsigned int xid)
1899 {
1900 	int rc = 0, stored_rc;
1901 	static const int types[] = {
1902 		LOCKING_ANDX_LARGE_FILES,
1903 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1904 	};
1905 	unsigned int i;
1906 	unsigned int max_num, num, max_buf;
1907 	LOCKING_ANDX_RANGE *buf, *cur;
1908 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1909 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1910 	struct cifsLockInfo *li, *tmp;
1911 	__u64 length = cifs_flock_len(flock);
1912 	struct list_head tmp_llist;
1913 
1914 	INIT_LIST_HEAD(&tmp_llist);
1915 
1916 	/*
1917 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1918 	 * and check it before using.
1919 	 */
1920 	max_buf = tcon->ses->server->maxBuf;
1921 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1922 		return -EINVAL;
1923 
1924 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1925 		     PAGE_SIZE);
1926 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1927 			PAGE_SIZE);
1928 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1929 						sizeof(LOCKING_ANDX_RANGE);
1930 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1931 	if (!buf)
1932 		return -ENOMEM;
1933 
1934 	cifs_down_write(&cinode->lock_sem);
1935 	for (i = 0; i < 2; i++) {
1936 		cur = buf;
1937 		num = 0;
1938 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1939 			if (flock->fl_start > li->offset ||
1940 			    (flock->fl_start + length) <
1941 			    (li->offset + li->length))
1942 				continue;
1943 			if (current->tgid != li->pid)
1944 				continue;
1945 			if (types[i] != li->type)
1946 				continue;
1947 			if (cinode->can_cache_brlcks) {
1948 				/*
1949 				 * We can cache brlock requests - simply remove
1950 				 * a lock from the file's list.
1951 				 */
1952 				list_del(&li->llist);
1953 				cifs_del_lock_waiters(li);
1954 				kfree(li);
1955 				continue;
1956 			}
1957 			cur->Pid = cpu_to_le16(li->pid);
1958 			cur->LengthLow = cpu_to_le32((u32)li->length);
1959 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1960 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1961 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1962 			/*
1963 			 * We need to save a lock here to let us add it again to
1964 			 * the file's list if the unlock range request fails on
1965 			 * the server.
1966 			 */
1967 			list_move(&li->llist, &tmp_llist);
1968 			if (++num == max_num) {
1969 				stored_rc = cifs_lockv(xid, tcon,
1970 						       cfile->fid.netfid,
1971 						       li->type, num, 0, buf);
1972 				if (stored_rc) {
1973 					/*
1974 					 * We failed on the unlock range
1975 					 * request - add all locks from the tmp
1976 					 * list to the head of the file's list.
1977 					 */
1978 					cifs_move_llist(&tmp_llist,
1979 							&cfile->llist->locks);
1980 					rc = stored_rc;
1981 				} else
1982 					/*
1983 					 * The unlock range request succeed -
1984 					 * free the tmp list.
1985 					 */
1986 					cifs_free_llist(&tmp_llist);
1987 				cur = buf;
1988 				num = 0;
1989 			} else
1990 				cur++;
1991 		}
1992 		if (num) {
1993 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1994 					       types[i], num, 0, buf);
1995 			if (stored_rc) {
1996 				cifs_move_llist(&tmp_llist,
1997 						&cfile->llist->locks);
1998 				rc = stored_rc;
1999 			} else
2000 				cifs_free_llist(&tmp_llist);
2001 		}
2002 	}
2003 
2004 	up_write(&cinode->lock_sem);
2005 	kfree(buf);
2006 	return rc;
2007 }
2008 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2009 
2010 static int
2011 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
2012 	   bool wait_flag, bool posix_lck, int lock, int unlock,
2013 	   unsigned int xid)
2014 {
2015 	int rc = 0;
2016 	__u64 length = cifs_flock_len(flock);
2017 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2018 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2019 	struct TCP_Server_Info *server = tcon->ses->server;
2020 	struct inode *inode = d_inode(cfile->dentry);
2021 
2022 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
2023 	if (posix_lck) {
2024 		int posix_lock_type;
2025 
2026 		rc = cifs_posix_lock_set(file, flock);
2027 		if (rc <= FILE_LOCK_DEFERRED)
2028 			return rc;
2029 
2030 		if (type & server->vals->shared_lock_type)
2031 			posix_lock_type = CIFS_RDLCK;
2032 		else
2033 			posix_lock_type = CIFS_WRLCK;
2034 
2035 		if (unlock == 1)
2036 			posix_lock_type = CIFS_UNLCK;
2037 
2038 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
2039 				      hash_lockowner(flock->c.flc_owner),
2040 				      flock->fl_start, length,
2041 				      NULL, posix_lock_type, wait_flag);
2042 		goto out;
2043 	}
2044 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2045 	if (lock) {
2046 		struct cifsLockInfo *lock;
2047 
2048 		lock = cifs_lock_init(flock->fl_start, length, type,
2049 				      flock->c.flc_flags);
2050 		if (!lock)
2051 			return -ENOMEM;
2052 
2053 		rc = cifs_lock_add_if(cfile, lock, wait_flag);
2054 		if (rc < 0) {
2055 			kfree(lock);
2056 			return rc;
2057 		}
2058 		if (!rc)
2059 			goto out;
2060 
2061 		/*
2062 		 * Windows 7 server can delay breaking lease from read to None
2063 		 * if we set a byte-range lock on a file - break it explicitly
2064 		 * before sending the lock to the server to be sure the next
2065 		 * read won't conflict with non-overlapted locks due to
2066 		 * pagereading.
2067 		 */
2068 		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
2069 					CIFS_CACHE_READ(CIFS_I(inode))) {
2070 			cifs_zap_mapping(inode);
2071 			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
2072 				 inode);
2073 			CIFS_I(inode)->oplock = 0;
2074 		}
2075 
2076 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
2077 					    type, 1, 0, wait_flag);
2078 		if (rc) {
2079 			kfree(lock);
2080 			return rc;
2081 		}
2082 
2083 		cifs_lock_add(cfile, lock);
2084 	} else if (unlock)
2085 		rc = server->ops->mand_unlock_range(cfile, flock, xid);
2086 
2087 out:
2088 	if ((flock->c.flc_flags & FL_POSIX) || (flock->c.flc_flags & FL_FLOCK)) {
2089 		/*
2090 		 * If this is a request to remove all locks because we
2091 		 * are closing the file, it doesn't matter if the
2092 		 * unlocking failed as both cifs.ko and the SMB server
2093 		 * remove the lock on file close
2094 		 */
2095 		if (rc) {
2096 			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2097 			if (!(flock->c.flc_flags & FL_CLOSE))
2098 				return rc;
2099 		}
2100 		rc = locks_lock_file_wait(file, flock);
2101 	}
2102 	return rc;
2103 }
2104 
2105 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2106 {
2107 	int rc, xid;
2108 	int lock = 0, unlock = 0;
2109 	bool wait_flag = false;
2110 	bool posix_lck = false;
2111 	struct cifs_sb_info *cifs_sb;
2112 	struct cifs_tcon *tcon;
2113 	struct cifsFileInfo *cfile;
2114 	__u32 type;
2115 
2116 	xid = get_xid();
2117 
2118 	if (!(fl->c.flc_flags & FL_FLOCK)) {
2119 		rc = -ENOLCK;
2120 		free_xid(xid);
2121 		return rc;
2122 	}
2123 
2124 	cfile = (struct cifsFileInfo *)file->private_data;
2125 	tcon = tlink_tcon(cfile->tlink);
2126 
2127 	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2128 			tcon->ses->server);
2129 	cifs_sb = CIFS_FILE_SB(file);
2130 
2131 	if (cap_unix(tcon->ses) &&
2132 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2133 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2134 		posix_lck = true;
2135 
2136 	if (!lock && !unlock) {
2137 		/*
2138 		 * if no lock or unlock then nothing to do since we do not
2139 		 * know what it is
2140 		 */
2141 		rc = -EOPNOTSUPP;
2142 		free_xid(xid);
2143 		return rc;
2144 	}
2145 
2146 	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2147 			xid);
2148 	free_xid(xid);
2149 	return rc;
2150 
2151 
2152 }
2153 
2154 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2155 {
2156 	int rc, xid;
2157 	int lock = 0, unlock = 0;
2158 	bool wait_flag = false;
2159 	bool posix_lck = false;
2160 	struct cifs_sb_info *cifs_sb;
2161 	struct cifs_tcon *tcon;
2162 	struct cifsFileInfo *cfile;
2163 	__u32 type;
2164 
2165 	rc = -EACCES;
2166 	xid = get_xid();
2167 
2168 	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2169 		 flock->c.flc_flags, flock->c.flc_type,
2170 		 (long long)flock->fl_start,
2171 		 (long long)flock->fl_end);
2172 
2173 	cfile = (struct cifsFileInfo *)file->private_data;
2174 	tcon = tlink_tcon(cfile->tlink);
2175 
2176 	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2177 			tcon->ses->server);
2178 	cifs_sb = CIFS_FILE_SB(file);
2179 	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2180 
2181 	if (cap_unix(tcon->ses) &&
2182 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2183 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2184 		posix_lck = true;
2185 	/*
2186 	 * BB add code here to normalize offset and length to account for
2187 	 * negative length which we can not accept over the wire.
2188 	 */
2189 	if (IS_GETLK(cmd)) {
2190 		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2191 		free_xid(xid);
2192 		return rc;
2193 	}
2194 
2195 	if (!lock && !unlock) {
2196 		/*
2197 		 * if no lock or unlock then nothing to do since we do not
2198 		 * know what it is
2199 		 */
2200 		free_xid(xid);
2201 		return -EOPNOTSUPP;
2202 	}
2203 
2204 	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2205 			xid);
2206 	free_xid(xid);
2207 	return rc;
2208 }
2209 
2210 /*
2211  * update the file size (if needed) after a write. Should be called with
2212  * the inode->i_lock held
2213  */
2214 void
2215 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2216 		      unsigned int bytes_written)
2217 {
2218 	loff_t end_of_write = offset + bytes_written;
2219 
2220 	if (end_of_write > cifsi->netfs.remote_i_size)
2221 		netfs_resize_file(&cifsi->netfs, end_of_write, true);
2222 }
2223 
2224 static ssize_t
2225 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2226 	   size_t write_size, loff_t *offset)
2227 {
2228 	int rc = 0;
2229 	unsigned int bytes_written = 0;
2230 	unsigned int total_written;
2231 	struct cifs_tcon *tcon;
2232 	struct TCP_Server_Info *server;
2233 	unsigned int xid;
2234 	struct dentry *dentry = open_file->dentry;
2235 	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2236 	struct cifs_io_parms io_parms = {0};
2237 
2238 	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2239 		 write_size, *offset, dentry);
2240 
2241 	tcon = tlink_tcon(open_file->tlink);
2242 	server = tcon->ses->server;
2243 
2244 	if (!server->ops->sync_write)
2245 		return -ENOSYS;
2246 
2247 	xid = get_xid();
2248 
2249 	for (total_written = 0; write_size > total_written;
2250 	     total_written += bytes_written) {
2251 		rc = -EAGAIN;
2252 		while (rc == -EAGAIN) {
2253 			struct kvec iov[2];
2254 			unsigned int len;
2255 
2256 			if (open_file->invalidHandle) {
2257 				/* we could deadlock if we called
2258 				   filemap_fdatawait from here so tell
2259 				   reopen_file not to flush data to
2260 				   server now */
2261 				rc = cifs_reopen_file(open_file, false);
2262 				if (rc != 0)
2263 					break;
2264 			}
2265 
2266 			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2267 				  (unsigned int)write_size - total_written);
2268 			/* iov[0] is reserved for smb header */
2269 			iov[1].iov_base = (char *)write_data + total_written;
2270 			iov[1].iov_len = len;
2271 			io_parms.pid = pid;
2272 			io_parms.tcon = tcon;
2273 			io_parms.offset = *offset;
2274 			io_parms.length = len;
2275 			rc = server->ops->sync_write(xid, &open_file->fid,
2276 					&io_parms, &bytes_written, iov, 1);
2277 		}
2278 		if (rc || (bytes_written == 0)) {
2279 			if (total_written)
2280 				break;
2281 			else {
2282 				free_xid(xid);
2283 				return rc;
2284 			}
2285 		} else {
2286 			spin_lock(&d_inode(dentry)->i_lock);
2287 			cifs_update_eof(cifsi, *offset, bytes_written);
2288 			spin_unlock(&d_inode(dentry)->i_lock);
2289 			*offset += bytes_written;
2290 		}
2291 	}
2292 
2293 	cifs_stats_bytes_written(tcon, total_written);
2294 
2295 	if (total_written > 0) {
2296 		spin_lock(&d_inode(dentry)->i_lock);
2297 		if (*offset > d_inode(dentry)->i_size) {
2298 			i_size_write(d_inode(dentry), *offset);
2299 			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2300 		}
2301 		spin_unlock(&d_inode(dentry)->i_lock);
2302 	}
2303 	mark_inode_dirty_sync(d_inode(dentry));
2304 	free_xid(xid);
2305 	return total_written;
2306 }
2307 
2308 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2309 					bool fsuid_only)
2310 {
2311 	struct cifsFileInfo *open_file = NULL;
2312 	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2313 
2314 	/* only filter by fsuid on multiuser mounts */
2315 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2316 		fsuid_only = false;
2317 
2318 	spin_lock(&cifs_inode->open_file_lock);
2319 	/* we could simply get the first_list_entry since write-only entries
2320 	   are always at the end of the list but since the first entry might
2321 	   have a close pending, we go through the whole list */
2322 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2323 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2324 			continue;
2325 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2326 			if ((!open_file->invalidHandle)) {
2327 				/* found a good file */
2328 				/* lock it so it will not be closed on us */
2329 				cifsFileInfo_get(open_file);
2330 				spin_unlock(&cifs_inode->open_file_lock);
2331 				return open_file;
2332 			} /* else might as well continue, and look for
2333 			     another, or simply have the caller reopen it
2334 			     again rather than trying to fix this handle */
2335 		} else /* write only file */
2336 			break; /* write only files are last so must be done */
2337 	}
2338 	spin_unlock(&cifs_inode->open_file_lock);
2339 	return NULL;
2340 }
2341 
2342 /* Return -EBADF if no handle is found and general rc otherwise */
2343 int
2344 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2345 		       struct cifsFileInfo **ret_file)
2346 {
2347 	struct cifsFileInfo *open_file, *inv_file = NULL;
2348 	struct cifs_sb_info *cifs_sb;
2349 	bool any_available = false;
2350 	int rc = -EBADF;
2351 	unsigned int refind = 0;
2352 	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2353 	bool with_delete = flags & FIND_WR_WITH_DELETE;
2354 	*ret_file = NULL;
2355 
2356 	/*
2357 	 * Having a null inode here (because mapping->host was set to zero by
2358 	 * the VFS or MM) should not happen but we had reports of on oops (due
2359 	 * to it being zero) during stress testcases so we need to check for it
2360 	 */
2361 
2362 	if (cifs_inode == NULL) {
2363 		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2364 		dump_stack();
2365 		return rc;
2366 	}
2367 
2368 	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2369 
2370 	/* only filter by fsuid on multiuser mounts */
2371 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2372 		fsuid_only = false;
2373 
2374 	spin_lock(&cifs_inode->open_file_lock);
2375 refind_writable:
2376 	if (refind > MAX_REOPEN_ATT) {
2377 		spin_unlock(&cifs_inode->open_file_lock);
2378 		return rc;
2379 	}
2380 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2381 		if (!any_available && open_file->pid != current->tgid)
2382 			continue;
2383 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2384 			continue;
2385 		if (with_delete && !(open_file->fid.access & DELETE))
2386 			continue;
2387 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2388 			if (!open_file->invalidHandle) {
2389 				/* found a good writable file */
2390 				cifsFileInfo_get(open_file);
2391 				spin_unlock(&cifs_inode->open_file_lock);
2392 				*ret_file = open_file;
2393 				return 0;
2394 			} else {
2395 				if (!inv_file)
2396 					inv_file = open_file;
2397 			}
2398 		}
2399 	}
2400 	/* couldn't find useable FH with same pid, try any available */
2401 	if (!any_available) {
2402 		any_available = true;
2403 		goto refind_writable;
2404 	}
2405 
2406 	if (inv_file) {
2407 		any_available = false;
2408 		cifsFileInfo_get(inv_file);
2409 	}
2410 
2411 	spin_unlock(&cifs_inode->open_file_lock);
2412 
2413 	if (inv_file) {
2414 		rc = cifs_reopen_file(inv_file, false);
2415 		if (!rc) {
2416 			*ret_file = inv_file;
2417 			return 0;
2418 		}
2419 
2420 		spin_lock(&cifs_inode->open_file_lock);
2421 		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2422 		spin_unlock(&cifs_inode->open_file_lock);
2423 		cifsFileInfo_put(inv_file);
2424 		++refind;
2425 		inv_file = NULL;
2426 		spin_lock(&cifs_inode->open_file_lock);
2427 		goto refind_writable;
2428 	}
2429 
2430 	return rc;
2431 }
2432 
2433 struct cifsFileInfo *
2434 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2435 {
2436 	struct cifsFileInfo *cfile;
2437 	int rc;
2438 
2439 	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2440 	if (rc)
2441 		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2442 
2443 	return cfile;
2444 }
2445 
2446 int
2447 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2448 		       int flags,
2449 		       struct cifsFileInfo **ret_file)
2450 {
2451 	struct cifsFileInfo *cfile;
2452 	void *page = alloc_dentry_path();
2453 
2454 	*ret_file = NULL;
2455 
2456 	spin_lock(&tcon->open_file_lock);
2457 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2458 		struct cifsInodeInfo *cinode;
2459 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2460 		if (IS_ERR(full_path)) {
2461 			spin_unlock(&tcon->open_file_lock);
2462 			free_dentry_path(page);
2463 			return PTR_ERR(full_path);
2464 		}
2465 		if (strcmp(full_path, name))
2466 			continue;
2467 
2468 		cinode = CIFS_I(d_inode(cfile->dentry));
2469 		spin_unlock(&tcon->open_file_lock);
2470 		free_dentry_path(page);
2471 		return cifs_get_writable_file(cinode, flags, ret_file);
2472 	}
2473 
2474 	spin_unlock(&tcon->open_file_lock);
2475 	free_dentry_path(page);
2476 	return -ENOENT;
2477 }
2478 
2479 int
2480 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2481 		       struct cifsFileInfo **ret_file)
2482 {
2483 	struct cifsFileInfo *cfile;
2484 	void *page = alloc_dentry_path();
2485 
2486 	*ret_file = NULL;
2487 
2488 	spin_lock(&tcon->open_file_lock);
2489 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2490 		struct cifsInodeInfo *cinode;
2491 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2492 		if (IS_ERR(full_path)) {
2493 			spin_unlock(&tcon->open_file_lock);
2494 			free_dentry_path(page);
2495 			return PTR_ERR(full_path);
2496 		}
2497 		if (strcmp(full_path, name))
2498 			continue;
2499 
2500 		cinode = CIFS_I(d_inode(cfile->dentry));
2501 		spin_unlock(&tcon->open_file_lock);
2502 		free_dentry_path(page);
2503 		*ret_file = find_readable_file(cinode, 0);
2504 		return *ret_file ? 0 : -ENOENT;
2505 	}
2506 
2507 	spin_unlock(&tcon->open_file_lock);
2508 	free_dentry_path(page);
2509 	return -ENOENT;
2510 }
2511 
2512 void
2513 cifs_writedata_release(struct kref *refcount)
2514 {
2515 	struct cifs_writedata *wdata = container_of(refcount,
2516 					struct cifs_writedata, refcount);
2517 #ifdef CONFIG_CIFS_SMB_DIRECT
2518 	if (wdata->mr) {
2519 		smbd_deregister_mr(wdata->mr);
2520 		wdata->mr = NULL;
2521 	}
2522 #endif
2523 
2524 	if (wdata->cfile)
2525 		cifsFileInfo_put(wdata->cfile);
2526 
2527 	kfree(wdata);
2528 }
2529 
2530 /*
2531  * Write failed with a retryable error. Resend the write request. It's also
2532  * possible that the page was redirtied so re-clean the page.
2533  */
2534 static void
2535 cifs_writev_requeue(struct cifs_writedata *wdata)
2536 {
2537 	int rc = 0;
2538 	struct inode *inode = d_inode(wdata->cfile->dentry);
2539 	struct TCP_Server_Info *server;
2540 	unsigned int rest_len = wdata->bytes;
2541 	loff_t fpos = wdata->offset;
2542 
2543 	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2544 	do {
2545 		struct cifs_writedata *wdata2;
2546 		unsigned int wsize, cur_len;
2547 
2548 		wsize = server->ops->wp_retry_size(inode);
2549 		if (wsize < rest_len) {
2550 			if (wsize < PAGE_SIZE) {
2551 				rc = -EOPNOTSUPP;
2552 				break;
2553 			}
2554 			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2555 		} else {
2556 			cur_len = rest_len;
2557 		}
2558 
2559 		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2560 		if (!wdata2) {
2561 			rc = -ENOMEM;
2562 			break;
2563 		}
2564 
2565 		wdata2->sync_mode = wdata->sync_mode;
2566 		wdata2->offset	= fpos;
2567 		wdata2->bytes	= cur_len;
2568 		wdata2->iter	= wdata->iter;
2569 
2570 		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2571 		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2572 
2573 		if (iov_iter_is_xarray(&wdata2->iter))
2574 			/* Check for pages having been redirtied and clean
2575 			 * them.  We can do this by walking the xarray.  If
2576 			 * it's not an xarray, then it's a DIO and we shouldn't
2577 			 * be mucking around with the page bits.
2578 			 */
2579 			cifs_undirty_folios(inode, fpos, cur_len);
2580 
2581 		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2582 					    &wdata2->cfile);
2583 		if (!wdata2->cfile) {
2584 			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2585 				 rc);
2586 			if (!is_retryable_error(rc))
2587 				rc = -EBADF;
2588 		} else {
2589 			wdata2->pid = wdata2->cfile->pid;
2590 			rc = server->ops->async_writev(wdata2,
2591 						       cifs_writedata_release);
2592 		}
2593 
2594 		kref_put(&wdata2->refcount, cifs_writedata_release);
2595 		if (rc) {
2596 			if (is_retryable_error(rc))
2597 				continue;
2598 			fpos += cur_len;
2599 			rest_len -= cur_len;
2600 			break;
2601 		}
2602 
2603 		fpos += cur_len;
2604 		rest_len -= cur_len;
2605 	} while (rest_len > 0);
2606 
2607 	/* Clean up remaining pages from the original wdata */
2608 	if (iov_iter_is_xarray(&wdata->iter))
2609 		cifs_pages_write_failed(inode, fpos, rest_len);
2610 
2611 	if (rc != 0 && !is_retryable_error(rc))
2612 		mapping_set_error(inode->i_mapping, rc);
2613 	kref_put(&wdata->refcount, cifs_writedata_release);
2614 }
2615 
2616 void
2617 cifs_writev_complete(struct work_struct *work)
2618 {
2619 	struct cifs_writedata *wdata = container_of(work,
2620 						struct cifs_writedata, work);
2621 	struct inode *inode = d_inode(wdata->cfile->dentry);
2622 
2623 	if (wdata->result == 0) {
2624 		spin_lock(&inode->i_lock);
2625 		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2626 		spin_unlock(&inode->i_lock);
2627 		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2628 					 wdata->bytes);
2629 	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2630 		return cifs_writev_requeue(wdata);
2631 
2632 	if (wdata->result == -EAGAIN)
2633 		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2634 	else if (wdata->result < 0)
2635 		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2636 	else
2637 		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2638 
2639 	if (wdata->result != -EAGAIN)
2640 		mapping_set_error(inode->i_mapping, wdata->result);
2641 	kref_put(&wdata->refcount, cifs_writedata_release);
2642 }
2643 
2644 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2645 {
2646 	struct cifs_writedata *wdata;
2647 
2648 	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2649 	if (wdata != NULL) {
2650 		kref_init(&wdata->refcount);
2651 		INIT_LIST_HEAD(&wdata->list);
2652 		init_completion(&wdata->done);
2653 		INIT_WORK(&wdata->work, complete);
2654 	}
2655 	return wdata;
2656 }
2657 
2658 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2659 {
2660 	struct address_space *mapping = page->mapping;
2661 	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2662 	char *write_data;
2663 	int rc = -EFAULT;
2664 	int bytes_written = 0;
2665 	struct inode *inode;
2666 	struct cifsFileInfo *open_file;
2667 
2668 	if (!mapping || !mapping->host)
2669 		return -EFAULT;
2670 
2671 	inode = page->mapping->host;
2672 
2673 	offset += (loff_t)from;
2674 	write_data = kmap(page);
2675 	write_data += from;
2676 
2677 	if ((to > PAGE_SIZE) || (from > to)) {
2678 		kunmap(page);
2679 		return -EIO;
2680 	}
2681 
2682 	/* racing with truncate? */
2683 	if (offset > mapping->host->i_size) {
2684 		kunmap(page);
2685 		return 0; /* don't care */
2686 	}
2687 
2688 	/* check to make sure that we are not extending the file */
2689 	if (mapping->host->i_size - offset < (loff_t)to)
2690 		to = (unsigned)(mapping->host->i_size - offset);
2691 
2692 	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2693 				    &open_file);
2694 	if (!rc) {
2695 		bytes_written = cifs_write(open_file, open_file->pid,
2696 					   write_data, to - from, &offset);
2697 		cifsFileInfo_put(open_file);
2698 		/* Does mm or vfs already set times? */
2699 		simple_inode_init_ts(inode);
2700 		if ((bytes_written > 0) && (offset))
2701 			rc = 0;
2702 		else if (bytes_written < 0)
2703 			rc = bytes_written;
2704 		else
2705 			rc = -EFAULT;
2706 	} else {
2707 		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2708 		if (!is_retryable_error(rc))
2709 			rc = -EIO;
2710 	}
2711 
2712 	kunmap(page);
2713 	return rc;
2714 }
2715 
2716 /*
2717  * Extend the region to be written back to include subsequent contiguously
2718  * dirty pages if possible, but don't sleep while doing so.
2719  */
2720 static void cifs_extend_writeback(struct address_space *mapping,
2721 				  struct xa_state *xas,
2722 				  long *_count,
2723 				  loff_t start,
2724 				  int max_pages,
2725 				  loff_t max_len,
2726 				  size_t *_len)
2727 {
2728 	struct folio_batch batch;
2729 	struct folio *folio;
2730 	unsigned int nr_pages;
2731 	pgoff_t index = (start + *_len) / PAGE_SIZE;
2732 	size_t len;
2733 	bool stop = true;
2734 	unsigned int i;
2735 
2736 	folio_batch_init(&batch);
2737 
2738 	do {
2739 		/* Firstly, we gather up a batch of contiguous dirty pages
2740 		 * under the RCU read lock - but we can't clear the dirty flags
2741 		 * there if any of those pages are mapped.
2742 		 */
2743 		rcu_read_lock();
2744 
2745 		xas_for_each(xas, folio, ULONG_MAX) {
2746 			stop = true;
2747 			if (xas_retry(xas, folio))
2748 				continue;
2749 			if (xa_is_value(folio))
2750 				break;
2751 			if (folio->index != index) {
2752 				xas_reset(xas);
2753 				break;
2754 			}
2755 
2756 			if (!folio_try_get_rcu(folio)) {
2757 				xas_reset(xas);
2758 				continue;
2759 			}
2760 			nr_pages = folio_nr_pages(folio);
2761 			if (nr_pages > max_pages) {
2762 				xas_reset(xas);
2763 				break;
2764 			}
2765 
2766 			/* Has the page moved or been split? */
2767 			if (unlikely(folio != xas_reload(xas))) {
2768 				folio_put(folio);
2769 				xas_reset(xas);
2770 				break;
2771 			}
2772 
2773 			if (!folio_trylock(folio)) {
2774 				folio_put(folio);
2775 				xas_reset(xas);
2776 				break;
2777 			}
2778 			if (!folio_test_dirty(folio) ||
2779 			    folio_test_writeback(folio)) {
2780 				folio_unlock(folio);
2781 				folio_put(folio);
2782 				xas_reset(xas);
2783 				break;
2784 			}
2785 
2786 			max_pages -= nr_pages;
2787 			len = folio_size(folio);
2788 			stop = false;
2789 
2790 			index += nr_pages;
2791 			*_count -= nr_pages;
2792 			*_len += len;
2793 			if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2794 				stop = true;
2795 
2796 			if (!folio_batch_add(&batch, folio))
2797 				break;
2798 			if (stop)
2799 				break;
2800 		}
2801 
2802 		xas_pause(xas);
2803 		rcu_read_unlock();
2804 
2805 		/* Now, if we obtained any pages, we can shift them to being
2806 		 * writable and mark them for caching.
2807 		 */
2808 		if (!folio_batch_count(&batch))
2809 			break;
2810 
2811 		for (i = 0; i < folio_batch_count(&batch); i++) {
2812 			folio = batch.folios[i];
2813 			/* The folio should be locked, dirty and not undergoing
2814 			 * writeback from the loop above.
2815 			 */
2816 			if (!folio_clear_dirty_for_io(folio))
2817 				WARN_ON(1);
2818 			folio_start_writeback(folio);
2819 			folio_unlock(folio);
2820 		}
2821 
2822 		folio_batch_release(&batch);
2823 		cond_resched();
2824 	} while (!stop);
2825 }
2826 
2827 /*
2828  * Write back the locked page and any subsequent non-locked dirty pages.
2829  */
2830 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2831 						 struct writeback_control *wbc,
2832 						 struct xa_state *xas,
2833 						 struct folio *folio,
2834 						 unsigned long long start,
2835 						 unsigned long long end)
2836 {
2837 	struct inode *inode = mapping->host;
2838 	struct TCP_Server_Info *server;
2839 	struct cifs_writedata *wdata;
2840 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2841 	struct cifs_credits credits_on_stack;
2842 	struct cifs_credits *credits = &credits_on_stack;
2843 	struct cifsFileInfo *cfile = NULL;
2844 	unsigned long long i_size = i_size_read(inode), max_len;
2845 	unsigned int xid, wsize;
2846 	size_t len = folio_size(folio);
2847 	long count = wbc->nr_to_write;
2848 	int rc;
2849 
2850 	/* The folio should be locked, dirty and not undergoing writeback. */
2851 	if (!folio_clear_dirty_for_io(folio))
2852 		WARN_ON_ONCE(1);
2853 	folio_start_writeback(folio);
2854 
2855 	count -= folio_nr_pages(folio);
2856 
2857 	xid = get_xid();
2858 	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2859 
2860 	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2861 	if (rc) {
2862 		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2863 		goto err_xid;
2864 	}
2865 
2866 	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2867 					   &wsize, credits);
2868 	if (rc != 0)
2869 		goto err_close;
2870 
2871 	wdata = cifs_writedata_alloc(cifs_writev_complete);
2872 	if (!wdata) {
2873 		rc = -ENOMEM;
2874 		goto err_uncredit;
2875 	}
2876 
2877 	wdata->sync_mode = wbc->sync_mode;
2878 	wdata->offset = folio_pos(folio);
2879 	wdata->pid = cfile->pid;
2880 	wdata->credits = credits_on_stack;
2881 	wdata->cfile = cfile;
2882 	wdata->server = server;
2883 	cfile = NULL;
2884 
2885 	/* Find all consecutive lockable dirty pages that have contiguous
2886 	 * written regions, stopping when we find a page that is not
2887 	 * immediately lockable, is not dirty or is missing, or we reach the
2888 	 * end of the range.
2889 	 */
2890 	if (start < i_size) {
2891 		/* Trim the write to the EOF; the extra data is ignored.  Also
2892 		 * put an upper limit on the size of a single storedata op.
2893 		 */
2894 		max_len = wsize;
2895 		max_len = min_t(unsigned long long, max_len, end - start + 1);
2896 		max_len = min_t(unsigned long long, max_len, i_size - start);
2897 
2898 		if (len < max_len) {
2899 			int max_pages = INT_MAX;
2900 
2901 #ifdef CONFIG_CIFS_SMB_DIRECT
2902 			if (server->smbd_conn)
2903 				max_pages = server->smbd_conn->max_frmr_depth;
2904 #endif
2905 			max_pages -= folio_nr_pages(folio);
2906 
2907 			if (max_pages > 0)
2908 				cifs_extend_writeback(mapping, xas, &count, start,
2909 						      max_pages, max_len, &len);
2910 		}
2911 	}
2912 	len = min_t(unsigned long long, len, i_size - start);
2913 
2914 	/* We now have a contiguous set of dirty pages, each with writeback
2915 	 * set; the first page is still locked at this point, but all the rest
2916 	 * have been unlocked.
2917 	 */
2918 	folio_unlock(folio);
2919 	wdata->bytes = len;
2920 
2921 	if (start < i_size) {
2922 		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2923 				start, len);
2924 
2925 		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2926 		if (rc)
2927 			goto err_wdata;
2928 
2929 		if (wdata->cfile->invalidHandle)
2930 			rc = -EAGAIN;
2931 		else
2932 			rc = wdata->server->ops->async_writev(wdata,
2933 							      cifs_writedata_release);
2934 		if (rc >= 0) {
2935 			kref_put(&wdata->refcount, cifs_writedata_release);
2936 			goto err_close;
2937 		}
2938 	} else {
2939 		/* The dirty region was entirely beyond the EOF. */
2940 		cifs_pages_written_back(inode, start, len);
2941 		rc = 0;
2942 	}
2943 
2944 err_wdata:
2945 	kref_put(&wdata->refcount, cifs_writedata_release);
2946 err_uncredit:
2947 	add_credits_and_wake_if(server, credits, 0);
2948 err_close:
2949 	if (cfile)
2950 		cifsFileInfo_put(cfile);
2951 err_xid:
2952 	free_xid(xid);
2953 	if (rc == 0) {
2954 		wbc->nr_to_write = count;
2955 		rc = len;
2956 	} else if (is_retryable_error(rc)) {
2957 		cifs_pages_write_redirty(inode, start, len);
2958 	} else {
2959 		cifs_pages_write_failed(inode, start, len);
2960 		mapping_set_error(mapping, rc);
2961 	}
2962 	/* Indication to update ctime and mtime as close is deferred */
2963 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2964 	return rc;
2965 }
2966 
2967 /*
2968  * write a region of pages back to the server
2969  */
2970 static ssize_t cifs_writepages_begin(struct address_space *mapping,
2971 				     struct writeback_control *wbc,
2972 				     struct xa_state *xas,
2973 				     unsigned long long *_start,
2974 				     unsigned long long end)
2975 {
2976 	struct folio *folio;
2977 	unsigned long long start = *_start;
2978 	ssize_t ret;
2979 	int skips = 0;
2980 
2981 search_again:
2982 	/* Find the first dirty page. */
2983 	rcu_read_lock();
2984 
2985 	for (;;) {
2986 		folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2987 		if (xas_retry(xas, folio) || xa_is_value(folio))
2988 			continue;
2989 		if (!folio)
2990 			break;
2991 
2992 		if (!folio_try_get_rcu(folio)) {
2993 			xas_reset(xas);
2994 			continue;
2995 		}
2996 
2997 		if (unlikely(folio != xas_reload(xas))) {
2998 			folio_put(folio);
2999 			xas_reset(xas);
3000 			continue;
3001 		}
3002 
3003 		xas_pause(xas);
3004 		break;
3005 	}
3006 	rcu_read_unlock();
3007 	if (!folio)
3008 		return 0;
3009 
3010 	start = folio_pos(folio); /* May regress with THPs */
3011 
3012 	/* At this point we hold neither the i_pages lock nor the page lock:
3013 	 * the page may be truncated or invalidated (changing page->mapping to
3014 	 * NULL), or even swizzled back from swapper_space to tmpfs file
3015 	 * mapping
3016 	 */
3017 lock_again:
3018 	if (wbc->sync_mode != WB_SYNC_NONE) {
3019 		ret = folio_lock_killable(folio);
3020 		if (ret < 0)
3021 			return ret;
3022 	} else {
3023 		if (!folio_trylock(folio))
3024 			goto search_again;
3025 	}
3026 
3027 	if (folio->mapping != mapping ||
3028 	    !folio_test_dirty(folio)) {
3029 		start += folio_size(folio);
3030 		folio_unlock(folio);
3031 		goto search_again;
3032 	}
3033 
3034 	if (folio_test_writeback(folio) ||
3035 	    folio_test_fscache(folio)) {
3036 		folio_unlock(folio);
3037 		if (wbc->sync_mode != WB_SYNC_NONE) {
3038 			folio_wait_writeback(folio);
3039 #ifdef CONFIG_CIFS_FSCACHE
3040 			folio_wait_fscache(folio);
3041 #endif
3042 			goto lock_again;
3043 		}
3044 
3045 		start += folio_size(folio);
3046 		if (wbc->sync_mode == WB_SYNC_NONE) {
3047 			if (skips >= 5 || need_resched()) {
3048 				ret = 0;
3049 				goto out;
3050 			}
3051 			skips++;
3052 		}
3053 		goto search_again;
3054 	}
3055 
3056 	ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
3057 out:
3058 	if (ret > 0)
3059 		*_start = start + ret;
3060 	return ret;
3061 }
3062 
3063 /*
3064  * Write a region of pages back to the server
3065  */
3066 static int cifs_writepages_region(struct address_space *mapping,
3067 				  struct writeback_control *wbc,
3068 				  unsigned long long *_start,
3069 				  unsigned long long end)
3070 {
3071 	ssize_t ret;
3072 
3073 	XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
3074 
3075 	do {
3076 		ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
3077 		if (ret > 0 && wbc->nr_to_write > 0)
3078 			cond_resched();
3079 	} while (ret > 0 && wbc->nr_to_write > 0);
3080 
3081 	return ret > 0 ? 0 : ret;
3082 }
3083 
3084 /*
3085  * Write some of the pending data back to the server
3086  */
3087 static int cifs_writepages(struct address_space *mapping,
3088 			   struct writeback_control *wbc)
3089 {
3090 	loff_t start, end;
3091 	int ret;
3092 
3093 	/* We have to be careful as we can end up racing with setattr()
3094 	 * truncating the pagecache since the caller doesn't take a lock here
3095 	 * to prevent it.
3096 	 */
3097 
3098 	if (wbc->range_cyclic && mapping->writeback_index) {
3099 		start = mapping->writeback_index * PAGE_SIZE;
3100 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3101 		if (ret < 0)
3102 			goto out;
3103 
3104 		if (wbc->nr_to_write <= 0) {
3105 			mapping->writeback_index = start / PAGE_SIZE;
3106 			goto out;
3107 		}
3108 
3109 		start = 0;
3110 		end = mapping->writeback_index * PAGE_SIZE;
3111 		mapping->writeback_index = 0;
3112 		ret = cifs_writepages_region(mapping, wbc, &start, end);
3113 		if (ret == 0)
3114 			mapping->writeback_index = start / PAGE_SIZE;
3115 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3116 		start = 0;
3117 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3118 		if (wbc->nr_to_write > 0 && ret == 0)
3119 			mapping->writeback_index = start / PAGE_SIZE;
3120 	} else {
3121 		start = wbc->range_start;
3122 		ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3123 	}
3124 
3125 out:
3126 	return ret;
3127 }
3128 
3129 static int
3130 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3131 {
3132 	int rc;
3133 	unsigned int xid;
3134 
3135 	xid = get_xid();
3136 /* BB add check for wbc flags */
3137 	get_page(page);
3138 	if (!PageUptodate(page))
3139 		cifs_dbg(FYI, "ppw - page not up to date\n");
3140 
3141 	/*
3142 	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3143 	 *
3144 	 * A writepage() implementation always needs to do either this,
3145 	 * or re-dirty the page with "redirty_page_for_writepage()" in
3146 	 * the case of a failure.
3147 	 *
3148 	 * Just unlocking the page will cause the radix tree tag-bits
3149 	 * to fail to update with the state of the page correctly.
3150 	 */
3151 	set_page_writeback(page);
3152 retry_write:
3153 	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3154 	if (is_retryable_error(rc)) {
3155 		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3156 			goto retry_write;
3157 		redirty_page_for_writepage(wbc, page);
3158 	} else if (rc != 0) {
3159 		SetPageError(page);
3160 		mapping_set_error(page->mapping, rc);
3161 	} else {
3162 		SetPageUptodate(page);
3163 	}
3164 	end_page_writeback(page);
3165 	put_page(page);
3166 	free_xid(xid);
3167 	return rc;
3168 }
3169 
3170 static int cifs_write_end(struct file *file, struct address_space *mapping,
3171 			loff_t pos, unsigned len, unsigned copied,
3172 			struct page *page, void *fsdata)
3173 {
3174 	int rc;
3175 	struct inode *inode = mapping->host;
3176 	struct cifsFileInfo *cfile = file->private_data;
3177 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3178 	struct folio *folio = page_folio(page);
3179 	__u32 pid;
3180 
3181 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3182 		pid = cfile->pid;
3183 	else
3184 		pid = current->tgid;
3185 
3186 	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3187 		 page, pos, copied);
3188 
3189 	if (folio_test_checked(folio)) {
3190 		if (copied == len)
3191 			folio_mark_uptodate(folio);
3192 		folio_clear_checked(folio);
3193 	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3194 		folio_mark_uptodate(folio);
3195 
3196 	if (!folio_test_uptodate(folio)) {
3197 		char *page_data;
3198 		unsigned offset = pos & (PAGE_SIZE - 1);
3199 		unsigned int xid;
3200 
3201 		xid = get_xid();
3202 		/* this is probably better than directly calling
3203 		   partialpage_write since in this function the file handle is
3204 		   known which we might as well	leverage */
3205 		/* BB check if anything else missing out of ppw
3206 		   such as updating last write time */
3207 		page_data = kmap(page);
3208 		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3209 		/* if (rc < 0) should we set writebehind rc? */
3210 		kunmap(page);
3211 
3212 		free_xid(xid);
3213 	} else {
3214 		rc = copied;
3215 		pos += copied;
3216 		set_page_dirty(page);
3217 	}
3218 
3219 	if (rc > 0) {
3220 		spin_lock(&inode->i_lock);
3221 		if (pos > inode->i_size) {
3222 			loff_t additional_blocks = (512 - 1 + copied) >> 9;
3223 
3224 			i_size_write(inode, pos);
3225 			/*
3226 			 * Estimate new allocation size based on the amount written.
3227 			 * This will be updated from server on close (and on queryinfo)
3228 			 */
3229 			inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9,
3230 						inode->i_blocks + additional_blocks);
3231 		}
3232 		spin_unlock(&inode->i_lock);
3233 	}
3234 
3235 	unlock_page(page);
3236 	put_page(page);
3237 	/* Indication to update ctime and mtime as close is deferred */
3238 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3239 
3240 	return rc;
3241 }
3242 
3243 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3244 		      int datasync)
3245 {
3246 	unsigned int xid;
3247 	int rc = 0;
3248 	struct cifs_tcon *tcon;
3249 	struct TCP_Server_Info *server;
3250 	struct cifsFileInfo *smbfile = file->private_data;
3251 	struct inode *inode = file_inode(file);
3252 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3253 
3254 	rc = file_write_and_wait_range(file, start, end);
3255 	if (rc) {
3256 		trace_cifs_fsync_err(inode->i_ino, rc);
3257 		return rc;
3258 	}
3259 
3260 	xid = get_xid();
3261 
3262 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3263 		 file, datasync);
3264 
3265 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3266 		rc = cifs_zap_mapping(inode);
3267 		if (rc) {
3268 			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3269 			rc = 0; /* don't care about it in fsync */
3270 		}
3271 	}
3272 
3273 	tcon = tlink_tcon(smbfile->tlink);
3274 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3275 		server = tcon->ses->server;
3276 		if (server->ops->flush == NULL) {
3277 			rc = -ENOSYS;
3278 			goto strict_fsync_exit;
3279 		}
3280 
3281 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3282 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3283 			if (smbfile) {
3284 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3285 				cifsFileInfo_put(smbfile);
3286 			} else
3287 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3288 		} else
3289 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3290 	}
3291 
3292 strict_fsync_exit:
3293 	free_xid(xid);
3294 	return rc;
3295 }
3296 
3297 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3298 {
3299 	unsigned int xid;
3300 	int rc = 0;
3301 	struct cifs_tcon *tcon;
3302 	struct TCP_Server_Info *server;
3303 	struct cifsFileInfo *smbfile = file->private_data;
3304 	struct inode *inode = file_inode(file);
3305 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3306 
3307 	rc = file_write_and_wait_range(file, start, end);
3308 	if (rc) {
3309 		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3310 		return rc;
3311 	}
3312 
3313 	xid = get_xid();
3314 
3315 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3316 		 file, datasync);
3317 
3318 	tcon = tlink_tcon(smbfile->tlink);
3319 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3320 		server = tcon->ses->server;
3321 		if (server->ops->flush == NULL) {
3322 			rc = -ENOSYS;
3323 			goto fsync_exit;
3324 		}
3325 
3326 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3327 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3328 			if (smbfile) {
3329 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3330 				cifsFileInfo_put(smbfile);
3331 			} else
3332 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3333 		} else
3334 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3335 	}
3336 
3337 fsync_exit:
3338 	free_xid(xid);
3339 	return rc;
3340 }
3341 
3342 /*
3343  * As file closes, flush all cached write data for this inode checking
3344  * for write behind errors.
3345  */
3346 int cifs_flush(struct file *file, fl_owner_t id)
3347 {
3348 	struct inode *inode = file_inode(file);
3349 	int rc = 0;
3350 
3351 	if (file->f_mode & FMODE_WRITE)
3352 		rc = filemap_write_and_wait(inode->i_mapping);
3353 
3354 	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3355 	if (rc) {
3356 		/* get more nuanced writeback errors */
3357 		rc = filemap_check_wb_err(file->f_mapping, 0);
3358 		trace_cifs_flush_err(inode->i_ino, rc);
3359 	}
3360 	return rc;
3361 }
3362 
3363 static void
3364 cifs_uncached_writedata_release(struct kref *refcount)
3365 {
3366 	struct cifs_writedata *wdata = container_of(refcount,
3367 					struct cifs_writedata, refcount);
3368 
3369 	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3370 	cifs_writedata_release(refcount);
3371 }
3372 
3373 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3374 
3375 static void
3376 cifs_uncached_writev_complete(struct work_struct *work)
3377 {
3378 	struct cifs_writedata *wdata = container_of(work,
3379 					struct cifs_writedata, work);
3380 	struct inode *inode = d_inode(wdata->cfile->dentry);
3381 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3382 
3383 	spin_lock(&inode->i_lock);
3384 	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3385 	if (cifsi->netfs.remote_i_size > inode->i_size)
3386 		i_size_write(inode, cifsi->netfs.remote_i_size);
3387 	spin_unlock(&inode->i_lock);
3388 
3389 	complete(&wdata->done);
3390 	collect_uncached_write_data(wdata->ctx);
3391 	/* the below call can possibly free the last ref to aio ctx */
3392 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3393 }
3394 
3395 static int
3396 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3397 	struct cifs_aio_ctx *ctx)
3398 {
3399 	unsigned int wsize;
3400 	struct cifs_credits credits;
3401 	int rc;
3402 	struct TCP_Server_Info *server = wdata->server;
3403 
3404 	do {
3405 		if (wdata->cfile->invalidHandle) {
3406 			rc = cifs_reopen_file(wdata->cfile, false);
3407 			if (rc == -EAGAIN)
3408 				continue;
3409 			else if (rc)
3410 				break;
3411 		}
3412 
3413 
3414 		/*
3415 		 * Wait for credits to resend this wdata.
3416 		 * Note: we are attempting to resend the whole wdata not in
3417 		 * segments
3418 		 */
3419 		do {
3420 			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3421 						&wsize, &credits);
3422 			if (rc)
3423 				goto fail;
3424 
3425 			if (wsize < wdata->bytes) {
3426 				add_credits_and_wake_if(server, &credits, 0);
3427 				msleep(1000);
3428 			}
3429 		} while (wsize < wdata->bytes);
3430 		wdata->credits = credits;
3431 
3432 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3433 
3434 		if (!rc) {
3435 			if (wdata->cfile->invalidHandle)
3436 				rc = -EAGAIN;
3437 			else {
3438 				wdata->replay = true;
3439 #ifdef CONFIG_CIFS_SMB_DIRECT
3440 				if (wdata->mr) {
3441 					wdata->mr->need_invalidate = true;
3442 					smbd_deregister_mr(wdata->mr);
3443 					wdata->mr = NULL;
3444 				}
3445 #endif
3446 				rc = server->ops->async_writev(wdata,
3447 					cifs_uncached_writedata_release);
3448 			}
3449 		}
3450 
3451 		/* If the write was successfully sent, we are done */
3452 		if (!rc) {
3453 			list_add_tail(&wdata->list, wdata_list);
3454 			return 0;
3455 		}
3456 
3457 		/* Roll back credits and retry if needed */
3458 		add_credits_and_wake_if(server, &wdata->credits, 0);
3459 	} while (rc == -EAGAIN);
3460 
3461 fail:
3462 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3463 	return rc;
3464 }
3465 
3466 /*
3467  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3468  * size and maximum number of segments.
3469  */
3470 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3471 				     size_t max_segs, unsigned int *_nsegs)
3472 {
3473 	const struct bio_vec *bvecs = iter->bvec;
3474 	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3475 	size_t len, span = 0, n = iter->count;
3476 	size_t skip = iter->iov_offset;
3477 
3478 	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3479 		return 0;
3480 
3481 	while (n && ix < nbv && skip) {
3482 		len = bvecs[ix].bv_len;
3483 		if (skip < len)
3484 			break;
3485 		skip -= len;
3486 		n -= len;
3487 		ix++;
3488 	}
3489 
3490 	while (n && ix < nbv) {
3491 		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3492 		span += len;
3493 		max_size -= len;
3494 		nsegs++;
3495 		ix++;
3496 		if (max_size == 0 || nsegs >= max_segs)
3497 			break;
3498 		skip = 0;
3499 		n -= len;
3500 	}
3501 
3502 	*_nsegs = nsegs;
3503 	return span;
3504 }
3505 
3506 static int
3507 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3508 		     struct cifsFileInfo *open_file,
3509 		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3510 		     struct cifs_aio_ctx *ctx)
3511 {
3512 	int rc = 0;
3513 	size_t cur_len, max_len;
3514 	struct cifs_writedata *wdata;
3515 	pid_t pid;
3516 	struct TCP_Server_Info *server;
3517 	unsigned int xid, max_segs = INT_MAX;
3518 
3519 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3520 		pid = open_file->pid;
3521 	else
3522 		pid = current->tgid;
3523 
3524 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3525 	xid = get_xid();
3526 
3527 #ifdef CONFIG_CIFS_SMB_DIRECT
3528 	if (server->smbd_conn)
3529 		max_segs = server->smbd_conn->max_frmr_depth;
3530 #endif
3531 
3532 	do {
3533 		struct cifs_credits credits_on_stack;
3534 		struct cifs_credits *credits = &credits_on_stack;
3535 		unsigned int wsize, nsegs = 0;
3536 
3537 		if (signal_pending(current)) {
3538 			rc = -EINTR;
3539 			break;
3540 		}
3541 
3542 		if (open_file->invalidHandle) {
3543 			rc = cifs_reopen_file(open_file, false);
3544 			if (rc == -EAGAIN)
3545 				continue;
3546 			else if (rc)
3547 				break;
3548 		}
3549 
3550 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3551 						   &wsize, credits);
3552 		if (rc)
3553 			break;
3554 
3555 		max_len = min_t(const size_t, len, wsize);
3556 		if (!max_len) {
3557 			rc = -EAGAIN;
3558 			add_credits_and_wake_if(server, credits, 0);
3559 			break;
3560 		}
3561 
3562 		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3563 		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3564 			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3565 		if (cur_len == 0) {
3566 			rc = -EIO;
3567 			add_credits_and_wake_if(server, credits, 0);
3568 			break;
3569 		}
3570 
3571 		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3572 		if (!wdata) {
3573 			rc = -ENOMEM;
3574 			add_credits_and_wake_if(server, credits, 0);
3575 			break;
3576 		}
3577 
3578 		wdata->sync_mode = WB_SYNC_ALL;
3579 		wdata->offset	= (__u64)fpos;
3580 		wdata->cfile	= cifsFileInfo_get(open_file);
3581 		wdata->server	= server;
3582 		wdata->pid	= pid;
3583 		wdata->bytes	= cur_len;
3584 		wdata->credits	= credits_on_stack;
3585 		wdata->iter	= *from;
3586 		wdata->ctx	= ctx;
3587 		kref_get(&ctx->refcount);
3588 
3589 		iov_iter_truncate(&wdata->iter, cur_len);
3590 
3591 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3592 
3593 		if (!rc) {
3594 			if (wdata->cfile->invalidHandle)
3595 				rc = -EAGAIN;
3596 			else
3597 				rc = server->ops->async_writev(wdata,
3598 					cifs_uncached_writedata_release);
3599 		}
3600 
3601 		if (rc) {
3602 			add_credits_and_wake_if(server, &wdata->credits, 0);
3603 			kref_put(&wdata->refcount,
3604 				 cifs_uncached_writedata_release);
3605 			if (rc == -EAGAIN)
3606 				continue;
3607 			break;
3608 		}
3609 
3610 		list_add_tail(&wdata->list, wdata_list);
3611 		iov_iter_advance(from, cur_len);
3612 		fpos += cur_len;
3613 		len -= cur_len;
3614 	} while (len > 0);
3615 
3616 	free_xid(xid);
3617 	return rc;
3618 }
3619 
3620 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3621 {
3622 	struct cifs_writedata *wdata, *tmp;
3623 	struct cifs_tcon *tcon;
3624 	struct cifs_sb_info *cifs_sb;
3625 	struct dentry *dentry = ctx->cfile->dentry;
3626 	ssize_t rc;
3627 
3628 	tcon = tlink_tcon(ctx->cfile->tlink);
3629 	cifs_sb = CIFS_SB(dentry->d_sb);
3630 
3631 	mutex_lock(&ctx->aio_mutex);
3632 
3633 	if (list_empty(&ctx->list)) {
3634 		mutex_unlock(&ctx->aio_mutex);
3635 		return;
3636 	}
3637 
3638 	rc = ctx->rc;
3639 	/*
3640 	 * Wait for and collect replies for any successful sends in order of
3641 	 * increasing offset. Once an error is hit, then return without waiting
3642 	 * for any more replies.
3643 	 */
3644 restart_loop:
3645 	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3646 		if (!rc) {
3647 			if (!try_wait_for_completion(&wdata->done)) {
3648 				mutex_unlock(&ctx->aio_mutex);
3649 				return;
3650 			}
3651 
3652 			if (wdata->result)
3653 				rc = wdata->result;
3654 			else
3655 				ctx->total_len += wdata->bytes;
3656 
3657 			/* resend call if it's a retryable error */
3658 			if (rc == -EAGAIN) {
3659 				struct list_head tmp_list;
3660 				struct iov_iter tmp_from = ctx->iter;
3661 
3662 				INIT_LIST_HEAD(&tmp_list);
3663 				list_del_init(&wdata->list);
3664 
3665 				if (ctx->direct_io)
3666 					rc = cifs_resend_wdata(
3667 						wdata, &tmp_list, ctx);
3668 				else {
3669 					iov_iter_advance(&tmp_from,
3670 						 wdata->offset - ctx->pos);
3671 
3672 					rc = cifs_write_from_iter(wdata->offset,
3673 						wdata->bytes, &tmp_from,
3674 						ctx->cfile, cifs_sb, &tmp_list,
3675 						ctx);
3676 
3677 					kref_put(&wdata->refcount,
3678 						cifs_uncached_writedata_release);
3679 				}
3680 
3681 				list_splice(&tmp_list, &ctx->list);
3682 				goto restart_loop;
3683 			}
3684 		}
3685 		list_del_init(&wdata->list);
3686 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3687 	}
3688 
3689 	cifs_stats_bytes_written(tcon, ctx->total_len);
3690 	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3691 
3692 	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3693 
3694 	mutex_unlock(&ctx->aio_mutex);
3695 
3696 	if (ctx->iocb && ctx->iocb->ki_complete)
3697 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3698 	else
3699 		complete(&ctx->done);
3700 }
3701 
3702 static ssize_t __cifs_writev(
3703 	struct kiocb *iocb, struct iov_iter *from, bool direct)
3704 {
3705 	struct file *file = iocb->ki_filp;
3706 	ssize_t total_written = 0;
3707 	struct cifsFileInfo *cfile;
3708 	struct cifs_tcon *tcon;
3709 	struct cifs_sb_info *cifs_sb;
3710 	struct cifs_aio_ctx *ctx;
3711 	int rc;
3712 
3713 	rc = generic_write_checks(iocb, from);
3714 	if (rc <= 0)
3715 		return rc;
3716 
3717 	cifs_sb = CIFS_FILE_SB(file);
3718 	cfile = file->private_data;
3719 	tcon = tlink_tcon(cfile->tlink);
3720 
3721 	if (!tcon->ses->server->ops->async_writev)
3722 		return -ENOSYS;
3723 
3724 	ctx = cifs_aio_ctx_alloc();
3725 	if (!ctx)
3726 		return -ENOMEM;
3727 
3728 	ctx->cfile = cifsFileInfo_get(cfile);
3729 
3730 	if (!is_sync_kiocb(iocb))
3731 		ctx->iocb = iocb;
3732 
3733 	ctx->pos = iocb->ki_pos;
3734 	ctx->direct_io = direct;
3735 	ctx->nr_pinned_pages = 0;
3736 
3737 	if (user_backed_iter(from)) {
3738 		/*
3739 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3740 		 * they contain references to the calling process's virtual
3741 		 * memory layout which won't be available in an async worker
3742 		 * thread.  This also takes a pin on every folio involved.
3743 		 */
3744 		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3745 					     &ctx->iter, 0);
3746 		if (rc < 0) {
3747 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3748 			return rc;
3749 		}
3750 
3751 		ctx->nr_pinned_pages = rc;
3752 		ctx->bv = (void *)ctx->iter.bvec;
3753 		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3754 	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3755 		   !is_sync_kiocb(iocb)) {
3756 		/*
3757 		 * If the op is asynchronous, we need to copy the list attached
3758 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3759 		 * will be pinned by the caller; in any case, we may or may not
3760 		 * be able to pin the pages, so we don't try.
3761 		 */
3762 		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3763 		if (!ctx->bv) {
3764 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3765 			return -ENOMEM;
3766 		}
3767 	} else {
3768 		/*
3769 		 * Otherwise, we just pass the iterator down as-is and rely on
3770 		 * the caller to make sure the pages referred to by the
3771 		 * iterator don't evaporate.
3772 		 */
3773 		ctx->iter = *from;
3774 	}
3775 
3776 	ctx->len = iov_iter_count(&ctx->iter);
3777 
3778 	/* grab a lock here due to read response handlers can access ctx */
3779 	mutex_lock(&ctx->aio_mutex);
3780 
3781 	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3782 				  cfile, cifs_sb, &ctx->list, ctx);
3783 
3784 	/*
3785 	 * If at least one write was successfully sent, then discard any rc
3786 	 * value from the later writes. If the other write succeeds, then
3787 	 * we'll end up returning whatever was written. If it fails, then
3788 	 * we'll get a new rc value from that.
3789 	 */
3790 	if (!list_empty(&ctx->list))
3791 		rc = 0;
3792 
3793 	mutex_unlock(&ctx->aio_mutex);
3794 
3795 	if (rc) {
3796 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3797 		return rc;
3798 	}
3799 
3800 	if (!is_sync_kiocb(iocb)) {
3801 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3802 		return -EIOCBQUEUED;
3803 	}
3804 
3805 	rc = wait_for_completion_killable(&ctx->done);
3806 	if (rc) {
3807 		mutex_lock(&ctx->aio_mutex);
3808 		ctx->rc = rc = -EINTR;
3809 		total_written = ctx->total_len;
3810 		mutex_unlock(&ctx->aio_mutex);
3811 	} else {
3812 		rc = ctx->rc;
3813 		total_written = ctx->total_len;
3814 	}
3815 
3816 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3817 
3818 	if (unlikely(!total_written))
3819 		return rc;
3820 
3821 	iocb->ki_pos += total_written;
3822 	return total_written;
3823 }
3824 
3825 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3826 {
3827 	struct file *file = iocb->ki_filp;
3828 
3829 	cifs_revalidate_mapping(file->f_inode);
3830 	return __cifs_writev(iocb, from, true);
3831 }
3832 
3833 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3834 {
3835 	return __cifs_writev(iocb, from, false);
3836 }
3837 
3838 static ssize_t
3839 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3840 {
3841 	struct file *file = iocb->ki_filp;
3842 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3843 	struct inode *inode = file->f_mapping->host;
3844 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3845 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3846 	ssize_t rc;
3847 
3848 	inode_lock(inode);
3849 	/*
3850 	 * We need to hold the sem to be sure nobody modifies lock list
3851 	 * with a brlock that prevents writing.
3852 	 */
3853 	down_read(&cinode->lock_sem);
3854 
3855 	rc = generic_write_checks(iocb, from);
3856 	if (rc <= 0)
3857 		goto out;
3858 
3859 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3860 				     server->vals->exclusive_lock_type, 0,
3861 				     NULL, CIFS_WRITE_OP))
3862 		rc = __generic_file_write_iter(iocb, from);
3863 	else
3864 		rc = -EACCES;
3865 out:
3866 	up_read(&cinode->lock_sem);
3867 	inode_unlock(inode);
3868 
3869 	if (rc > 0)
3870 		rc = generic_write_sync(iocb, rc);
3871 	return rc;
3872 }
3873 
3874 ssize_t
3875 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3876 {
3877 	struct inode *inode = file_inode(iocb->ki_filp);
3878 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3879 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3880 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3881 						iocb->ki_filp->private_data;
3882 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3883 	ssize_t written;
3884 
3885 	written = cifs_get_writer(cinode);
3886 	if (written)
3887 		return written;
3888 
3889 	if (CIFS_CACHE_WRITE(cinode)) {
3890 		if (cap_unix(tcon->ses) &&
3891 		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3892 		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3893 			written = generic_file_write_iter(iocb, from);
3894 			goto out;
3895 		}
3896 		written = cifs_writev(iocb, from);
3897 		goto out;
3898 	}
3899 	/*
3900 	 * For non-oplocked files in strict cache mode we need to write the data
3901 	 * to the server exactly from the pos to pos+len-1 rather than flush all
3902 	 * affected pages because it may cause a error with mandatory locks on
3903 	 * these pages but not on the region from pos to ppos+len-1.
3904 	 */
3905 	written = cifs_user_writev(iocb, from);
3906 	if (CIFS_CACHE_READ(cinode)) {
3907 		/*
3908 		 * We have read level caching and we have just sent a write
3909 		 * request to the server thus making data in the cache stale.
3910 		 * Zap the cache and set oplock/lease level to NONE to avoid
3911 		 * reading stale data from the cache. All subsequent read
3912 		 * operations will read new data from the server.
3913 		 */
3914 		cifs_zap_mapping(inode);
3915 		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3916 			 inode);
3917 		cinode->oplock = 0;
3918 	}
3919 out:
3920 	cifs_put_writer(cinode);
3921 	return written;
3922 }
3923 
3924 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3925 {
3926 	struct cifs_readdata *rdata;
3927 
3928 	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3929 	if (rdata) {
3930 		kref_init(&rdata->refcount);
3931 		INIT_LIST_HEAD(&rdata->list);
3932 		init_completion(&rdata->done);
3933 		INIT_WORK(&rdata->work, complete);
3934 	}
3935 
3936 	return rdata;
3937 }
3938 
3939 void
3940 cifs_readdata_release(struct kref *refcount)
3941 {
3942 	struct cifs_readdata *rdata = container_of(refcount,
3943 					struct cifs_readdata, refcount);
3944 
3945 	if (rdata->ctx)
3946 		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3947 #ifdef CONFIG_CIFS_SMB_DIRECT
3948 	if (rdata->mr) {
3949 		smbd_deregister_mr(rdata->mr);
3950 		rdata->mr = NULL;
3951 	}
3952 #endif
3953 	if (rdata->cfile)
3954 		cifsFileInfo_put(rdata->cfile);
3955 
3956 	kfree(rdata);
3957 }
3958 
3959 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3960 
3961 static void
3962 cifs_uncached_readv_complete(struct work_struct *work)
3963 {
3964 	struct cifs_readdata *rdata = container_of(work,
3965 						struct cifs_readdata, work);
3966 
3967 	complete(&rdata->done);
3968 	collect_uncached_read_data(rdata->ctx);
3969 	/* the below call can possibly free the last ref to aio ctx */
3970 	kref_put(&rdata->refcount, cifs_readdata_release);
3971 }
3972 
3973 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3974 			struct list_head *rdata_list,
3975 			struct cifs_aio_ctx *ctx)
3976 {
3977 	unsigned int rsize;
3978 	struct cifs_credits credits;
3979 	int rc;
3980 	struct TCP_Server_Info *server;
3981 
3982 	/* XXX: should we pick a new channel here? */
3983 	server = rdata->server;
3984 
3985 	do {
3986 		if (rdata->cfile->invalidHandle) {
3987 			rc = cifs_reopen_file(rdata->cfile, true);
3988 			if (rc == -EAGAIN)
3989 				continue;
3990 			else if (rc)
3991 				break;
3992 		}
3993 
3994 		/*
3995 		 * Wait for credits to resend this rdata.
3996 		 * Note: we are attempting to resend the whole rdata not in
3997 		 * segments
3998 		 */
3999 		do {
4000 			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
4001 						&rsize, &credits);
4002 
4003 			if (rc)
4004 				goto fail;
4005 
4006 			if (rsize < rdata->bytes) {
4007 				add_credits_and_wake_if(server, &credits, 0);
4008 				msleep(1000);
4009 			}
4010 		} while (rsize < rdata->bytes);
4011 		rdata->credits = credits;
4012 
4013 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4014 		if (!rc) {
4015 			if (rdata->cfile->invalidHandle)
4016 				rc = -EAGAIN;
4017 			else {
4018 #ifdef CONFIG_CIFS_SMB_DIRECT
4019 				if (rdata->mr) {
4020 					rdata->mr->need_invalidate = true;
4021 					smbd_deregister_mr(rdata->mr);
4022 					rdata->mr = NULL;
4023 				}
4024 #endif
4025 				rc = server->ops->async_readv(rdata);
4026 			}
4027 		}
4028 
4029 		/* If the read was successfully sent, we are done */
4030 		if (!rc) {
4031 			/* Add to aio pending list */
4032 			list_add_tail(&rdata->list, rdata_list);
4033 			return 0;
4034 		}
4035 
4036 		/* Roll back credits and retry if needed */
4037 		add_credits_and_wake_if(server, &rdata->credits, 0);
4038 	} while (rc == -EAGAIN);
4039 
4040 fail:
4041 	kref_put(&rdata->refcount, cifs_readdata_release);
4042 	return rc;
4043 }
4044 
4045 static int
4046 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
4047 		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
4048 		     struct cifs_aio_ctx *ctx)
4049 {
4050 	struct cifs_readdata *rdata;
4051 	unsigned int rsize, nsegs, max_segs = INT_MAX;
4052 	struct cifs_credits credits_on_stack;
4053 	struct cifs_credits *credits = &credits_on_stack;
4054 	size_t cur_len, max_len;
4055 	int rc;
4056 	pid_t pid;
4057 	struct TCP_Server_Info *server;
4058 
4059 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4060 
4061 #ifdef CONFIG_CIFS_SMB_DIRECT
4062 	if (server->smbd_conn)
4063 		max_segs = server->smbd_conn->max_frmr_depth;
4064 #endif
4065 
4066 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4067 		pid = open_file->pid;
4068 	else
4069 		pid = current->tgid;
4070 
4071 	do {
4072 		if (open_file->invalidHandle) {
4073 			rc = cifs_reopen_file(open_file, true);
4074 			if (rc == -EAGAIN)
4075 				continue;
4076 			else if (rc)
4077 				break;
4078 		}
4079 
4080 		if (cifs_sb->ctx->rsize == 0)
4081 			cifs_sb->ctx->rsize =
4082 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4083 							     cifs_sb->ctx);
4084 
4085 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4086 						   &rsize, credits);
4087 		if (rc)
4088 			break;
4089 
4090 		max_len = min_t(size_t, len, rsize);
4091 
4092 		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
4093 						 max_segs, &nsegs);
4094 		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
4095 			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
4096 		if (cur_len == 0) {
4097 			rc = -EIO;
4098 			add_credits_and_wake_if(server, credits, 0);
4099 			break;
4100 		}
4101 
4102 		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
4103 		if (!rdata) {
4104 			add_credits_and_wake_if(server, credits, 0);
4105 			rc = -ENOMEM;
4106 			break;
4107 		}
4108 
4109 		rdata->server	= server;
4110 		rdata->cfile	= cifsFileInfo_get(open_file);
4111 		rdata->offset	= fpos;
4112 		rdata->bytes	= cur_len;
4113 		rdata->pid	= pid;
4114 		rdata->credits	= credits_on_stack;
4115 		rdata->ctx	= ctx;
4116 		kref_get(&ctx->refcount);
4117 
4118 		rdata->iter	= ctx->iter;
4119 		iov_iter_truncate(&rdata->iter, cur_len);
4120 
4121 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4122 
4123 		if (!rc) {
4124 			if (rdata->cfile->invalidHandle)
4125 				rc = -EAGAIN;
4126 			else
4127 				rc = server->ops->async_readv(rdata);
4128 		}
4129 
4130 		if (rc) {
4131 			add_credits_and_wake_if(server, &rdata->credits, 0);
4132 			kref_put(&rdata->refcount, cifs_readdata_release);
4133 			if (rc == -EAGAIN)
4134 				continue;
4135 			break;
4136 		}
4137 
4138 		list_add_tail(&rdata->list, rdata_list);
4139 		iov_iter_advance(&ctx->iter, cur_len);
4140 		fpos += cur_len;
4141 		len -= cur_len;
4142 	} while (len > 0);
4143 
4144 	return rc;
4145 }
4146 
4147 static void
4148 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4149 {
4150 	struct cifs_readdata *rdata, *tmp;
4151 	struct cifs_sb_info *cifs_sb;
4152 	int rc;
4153 
4154 	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4155 
4156 	mutex_lock(&ctx->aio_mutex);
4157 
4158 	if (list_empty(&ctx->list)) {
4159 		mutex_unlock(&ctx->aio_mutex);
4160 		return;
4161 	}
4162 
4163 	rc = ctx->rc;
4164 	/* the loop below should proceed in the order of increasing offsets */
4165 again:
4166 	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4167 		if (!rc) {
4168 			if (!try_wait_for_completion(&rdata->done)) {
4169 				mutex_unlock(&ctx->aio_mutex);
4170 				return;
4171 			}
4172 
4173 			if (rdata->result == -EAGAIN) {
4174 				/* resend call if it's a retryable error */
4175 				struct list_head tmp_list;
4176 				unsigned int got_bytes = rdata->got_bytes;
4177 
4178 				list_del_init(&rdata->list);
4179 				INIT_LIST_HEAD(&tmp_list);
4180 
4181 				if (ctx->direct_io) {
4182 					/*
4183 					 * Re-use rdata as this is a
4184 					 * direct I/O
4185 					 */
4186 					rc = cifs_resend_rdata(
4187 						rdata,
4188 						&tmp_list, ctx);
4189 				} else {
4190 					rc = cifs_send_async_read(
4191 						rdata->offset + got_bytes,
4192 						rdata->bytes - got_bytes,
4193 						rdata->cfile, cifs_sb,
4194 						&tmp_list, ctx);
4195 
4196 					kref_put(&rdata->refcount,
4197 						cifs_readdata_release);
4198 				}
4199 
4200 				list_splice(&tmp_list, &ctx->list);
4201 
4202 				goto again;
4203 			} else if (rdata->result)
4204 				rc = rdata->result;
4205 
4206 			/* if there was a short read -- discard anything left */
4207 			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4208 				rc = -ENODATA;
4209 
4210 			ctx->total_len += rdata->got_bytes;
4211 		}
4212 		list_del_init(&rdata->list);
4213 		kref_put(&rdata->refcount, cifs_readdata_release);
4214 	}
4215 
4216 	/* mask nodata case */
4217 	if (rc == -ENODATA)
4218 		rc = 0;
4219 
4220 	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4221 
4222 	mutex_unlock(&ctx->aio_mutex);
4223 
4224 	if (ctx->iocb && ctx->iocb->ki_complete)
4225 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4226 	else
4227 		complete(&ctx->done);
4228 }
4229 
4230 static ssize_t __cifs_readv(
4231 	struct kiocb *iocb, struct iov_iter *to, bool direct)
4232 {
4233 	size_t len;
4234 	struct file *file = iocb->ki_filp;
4235 	struct cifs_sb_info *cifs_sb;
4236 	struct cifsFileInfo *cfile;
4237 	struct cifs_tcon *tcon;
4238 	ssize_t rc, total_read = 0;
4239 	loff_t offset = iocb->ki_pos;
4240 	struct cifs_aio_ctx *ctx;
4241 
4242 	len = iov_iter_count(to);
4243 	if (!len)
4244 		return 0;
4245 
4246 	cifs_sb = CIFS_FILE_SB(file);
4247 	cfile = file->private_data;
4248 	tcon = tlink_tcon(cfile->tlink);
4249 
4250 	if (!tcon->ses->server->ops->async_readv)
4251 		return -ENOSYS;
4252 
4253 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4254 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4255 
4256 	ctx = cifs_aio_ctx_alloc();
4257 	if (!ctx)
4258 		return -ENOMEM;
4259 
4260 	ctx->pos	= offset;
4261 	ctx->direct_io	= direct;
4262 	ctx->len	= len;
4263 	ctx->cfile	= cifsFileInfo_get(cfile);
4264 	ctx->nr_pinned_pages = 0;
4265 
4266 	if (!is_sync_kiocb(iocb))
4267 		ctx->iocb = iocb;
4268 
4269 	if (user_backed_iter(to)) {
4270 		/*
4271 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4272 		 * they contain references to the calling process's virtual
4273 		 * memory layout which won't be available in an async worker
4274 		 * thread.  This also takes a pin on every folio involved.
4275 		 */
4276 		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4277 					     &ctx->iter, 0);
4278 		if (rc < 0) {
4279 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4280 			return rc;
4281 		}
4282 
4283 		ctx->nr_pinned_pages = rc;
4284 		ctx->bv = (void *)ctx->iter.bvec;
4285 		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4286 		ctx->should_dirty = true;
4287 	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4288 		   !is_sync_kiocb(iocb)) {
4289 		/*
4290 		 * If the op is asynchronous, we need to copy the list attached
4291 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4292 		 * will be retained by the caller; in any case, we may or may
4293 		 * not be able to pin the pages, so we don't try.
4294 		 */
4295 		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4296 		if (!ctx->bv) {
4297 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4298 			return -ENOMEM;
4299 		}
4300 	} else {
4301 		/*
4302 		 * Otherwise, we just pass the iterator down as-is and rely on
4303 		 * the caller to make sure the pages referred to by the
4304 		 * iterator don't evaporate.
4305 		 */
4306 		ctx->iter = *to;
4307 	}
4308 
4309 	if (direct) {
4310 		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4311 						  offset, offset + len - 1);
4312 		if (rc) {
4313 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4314 			return -EAGAIN;
4315 		}
4316 	}
4317 
4318 	/* grab a lock here due to read response handlers can access ctx */
4319 	mutex_lock(&ctx->aio_mutex);
4320 
4321 	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4322 
4323 	/* if at least one read request send succeeded, then reset rc */
4324 	if (!list_empty(&ctx->list))
4325 		rc = 0;
4326 
4327 	mutex_unlock(&ctx->aio_mutex);
4328 
4329 	if (rc) {
4330 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4331 		return rc;
4332 	}
4333 
4334 	if (!is_sync_kiocb(iocb)) {
4335 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4336 		return -EIOCBQUEUED;
4337 	}
4338 
4339 	rc = wait_for_completion_killable(&ctx->done);
4340 	if (rc) {
4341 		mutex_lock(&ctx->aio_mutex);
4342 		ctx->rc = rc = -EINTR;
4343 		total_read = ctx->total_len;
4344 		mutex_unlock(&ctx->aio_mutex);
4345 	} else {
4346 		rc = ctx->rc;
4347 		total_read = ctx->total_len;
4348 	}
4349 
4350 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4351 
4352 	if (total_read) {
4353 		iocb->ki_pos += total_read;
4354 		return total_read;
4355 	}
4356 	return rc;
4357 }
4358 
4359 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4360 {
4361 	return __cifs_readv(iocb, to, true);
4362 }
4363 
4364 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4365 {
4366 	return __cifs_readv(iocb, to, false);
4367 }
4368 
4369 ssize_t
4370 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4371 {
4372 	struct inode *inode = file_inode(iocb->ki_filp);
4373 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4374 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4375 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4376 						iocb->ki_filp->private_data;
4377 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4378 	int rc = -EACCES;
4379 
4380 	/*
4381 	 * In strict cache mode we need to read from the server all the time
4382 	 * if we don't have level II oplock because the server can delay mtime
4383 	 * change - so we can't make a decision about inode invalidating.
4384 	 * And we can also fail with pagereading if there are mandatory locks
4385 	 * on pages affected by this read but not on the region from pos to
4386 	 * pos+len-1.
4387 	 */
4388 	if (!CIFS_CACHE_READ(cinode))
4389 		return cifs_user_readv(iocb, to);
4390 
4391 	if (cap_unix(tcon->ses) &&
4392 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4393 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4394 		return generic_file_read_iter(iocb, to);
4395 
4396 	/*
4397 	 * We need to hold the sem to be sure nobody modifies lock list
4398 	 * with a brlock that prevents reading.
4399 	 */
4400 	down_read(&cinode->lock_sem);
4401 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4402 				     tcon->ses->server->vals->shared_lock_type,
4403 				     0, NULL, CIFS_READ_OP))
4404 		rc = generic_file_read_iter(iocb, to);
4405 	up_read(&cinode->lock_sem);
4406 	return rc;
4407 }
4408 
4409 static ssize_t
4410 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4411 {
4412 	int rc = -EACCES;
4413 	unsigned int bytes_read = 0;
4414 	unsigned int total_read;
4415 	unsigned int current_read_size;
4416 	unsigned int rsize;
4417 	struct cifs_sb_info *cifs_sb;
4418 	struct cifs_tcon *tcon;
4419 	struct TCP_Server_Info *server;
4420 	unsigned int xid;
4421 	char *cur_offset;
4422 	struct cifsFileInfo *open_file;
4423 	struct cifs_io_parms io_parms = {0};
4424 	int buf_type = CIFS_NO_BUFFER;
4425 	__u32 pid;
4426 
4427 	xid = get_xid();
4428 	cifs_sb = CIFS_FILE_SB(file);
4429 
4430 	/* FIXME: set up handlers for larger reads and/or convert to async */
4431 	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4432 
4433 	if (file->private_data == NULL) {
4434 		rc = -EBADF;
4435 		free_xid(xid);
4436 		return rc;
4437 	}
4438 	open_file = file->private_data;
4439 	tcon = tlink_tcon(open_file->tlink);
4440 	server = cifs_pick_channel(tcon->ses);
4441 
4442 	if (!server->ops->sync_read) {
4443 		free_xid(xid);
4444 		return -ENOSYS;
4445 	}
4446 
4447 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4448 		pid = open_file->pid;
4449 	else
4450 		pid = current->tgid;
4451 
4452 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4453 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4454 
4455 	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4456 	     total_read += bytes_read, cur_offset += bytes_read) {
4457 		do {
4458 			current_read_size = min_t(uint, read_size - total_read,
4459 						  rsize);
4460 			/*
4461 			 * For windows me and 9x we do not want to request more
4462 			 * than it negotiated since it will refuse the read
4463 			 * then.
4464 			 */
4465 			if (!(tcon->ses->capabilities &
4466 				tcon->ses->server->vals->cap_large_files)) {
4467 				current_read_size = min_t(uint,
4468 					current_read_size, CIFSMaxBufSize);
4469 			}
4470 			if (open_file->invalidHandle) {
4471 				rc = cifs_reopen_file(open_file, true);
4472 				if (rc != 0)
4473 					break;
4474 			}
4475 			io_parms.pid = pid;
4476 			io_parms.tcon = tcon;
4477 			io_parms.offset = *offset;
4478 			io_parms.length = current_read_size;
4479 			io_parms.server = server;
4480 			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4481 						    &bytes_read, &cur_offset,
4482 						    &buf_type);
4483 		} while (rc == -EAGAIN);
4484 
4485 		if (rc || (bytes_read == 0)) {
4486 			if (total_read) {
4487 				break;
4488 			} else {
4489 				free_xid(xid);
4490 				return rc;
4491 			}
4492 		} else {
4493 			cifs_stats_bytes_read(tcon, total_read);
4494 			*offset += bytes_read;
4495 		}
4496 	}
4497 	free_xid(xid);
4498 	return total_read;
4499 }
4500 
4501 /*
4502  * If the page is mmap'ed into a process' page tables, then we need to make
4503  * sure that it doesn't change while being written back.
4504  */
4505 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4506 {
4507 	struct folio *folio = page_folio(vmf->page);
4508 
4509 	/* Wait for the folio to be written to the cache before we allow it to
4510 	 * be modified.  We then assume the entire folio will need writing back.
4511 	 */
4512 #ifdef CONFIG_CIFS_FSCACHE
4513 	if (folio_test_fscache(folio) &&
4514 	    folio_wait_fscache_killable(folio) < 0)
4515 		return VM_FAULT_RETRY;
4516 #endif
4517 
4518 	folio_wait_writeback(folio);
4519 
4520 	if (folio_lock_killable(folio) < 0)
4521 		return VM_FAULT_RETRY;
4522 	return VM_FAULT_LOCKED;
4523 }
4524 
4525 static const struct vm_operations_struct cifs_file_vm_ops = {
4526 	.fault = filemap_fault,
4527 	.map_pages = filemap_map_pages,
4528 	.page_mkwrite = cifs_page_mkwrite,
4529 };
4530 
4531 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4532 {
4533 	int xid, rc = 0;
4534 	struct inode *inode = file_inode(file);
4535 
4536 	xid = get_xid();
4537 
4538 	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4539 		rc = cifs_zap_mapping(inode);
4540 	if (!rc)
4541 		rc = generic_file_mmap(file, vma);
4542 	if (!rc)
4543 		vma->vm_ops = &cifs_file_vm_ops;
4544 
4545 	free_xid(xid);
4546 	return rc;
4547 }
4548 
4549 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4550 {
4551 	int rc, xid;
4552 
4553 	xid = get_xid();
4554 
4555 	rc = cifs_revalidate_file(file);
4556 	if (rc)
4557 		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4558 			 rc);
4559 	if (!rc)
4560 		rc = generic_file_mmap(file, vma);
4561 	if (!rc)
4562 		vma->vm_ops = &cifs_file_vm_ops;
4563 
4564 	free_xid(xid);
4565 	return rc;
4566 }
4567 
4568 /*
4569  * Unlock a bunch of folios in the pagecache.
4570  */
4571 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4572 {
4573 	struct folio *folio;
4574 	XA_STATE(xas, &mapping->i_pages, first);
4575 
4576 	rcu_read_lock();
4577 	xas_for_each(&xas, folio, last) {
4578 		folio_unlock(folio);
4579 	}
4580 	rcu_read_unlock();
4581 }
4582 
4583 static void cifs_readahead_complete(struct work_struct *work)
4584 {
4585 	struct cifs_readdata *rdata = container_of(work,
4586 						   struct cifs_readdata, work);
4587 	struct folio *folio;
4588 	pgoff_t last;
4589 	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4590 
4591 	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4592 
4593 	if (good)
4594 		cifs_readahead_to_fscache(rdata->mapping->host,
4595 					  rdata->offset, rdata->bytes);
4596 
4597 	if (iov_iter_count(&rdata->iter) > 0)
4598 		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4599 
4600 	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4601 
4602 	rcu_read_lock();
4603 	xas_for_each(&xas, folio, last) {
4604 		if (good) {
4605 			flush_dcache_folio(folio);
4606 			folio_mark_uptodate(folio);
4607 		}
4608 		folio_unlock(folio);
4609 	}
4610 	rcu_read_unlock();
4611 
4612 	kref_put(&rdata->refcount, cifs_readdata_release);
4613 }
4614 
4615 static void cifs_readahead(struct readahead_control *ractl)
4616 {
4617 	struct cifsFileInfo *open_file = ractl->file->private_data;
4618 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4619 	struct TCP_Server_Info *server;
4620 	unsigned int xid, nr_pages, cache_nr_pages = 0;
4621 	unsigned int ra_pages;
4622 	pgoff_t next_cached = ULONG_MAX, ra_index;
4623 	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4624 		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4625 	bool check_cache = caching;
4626 	pid_t pid;
4627 	int rc = 0;
4628 
4629 	/* Note that readahead_count() lags behind our dequeuing of pages from
4630 	 * the ractl, wo we have to keep track for ourselves.
4631 	 */
4632 	ra_pages = readahead_count(ractl);
4633 	ra_index = readahead_index(ractl);
4634 
4635 	xid = get_xid();
4636 
4637 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4638 		pid = open_file->pid;
4639 	else
4640 		pid = current->tgid;
4641 
4642 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4643 
4644 	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4645 		 __func__, ractl->file, ractl->mapping, ra_pages);
4646 
4647 	/*
4648 	 * Chop the readahead request up into rsize-sized read requests.
4649 	 */
4650 	while ((nr_pages = ra_pages)) {
4651 		unsigned int i, rsize;
4652 		struct cifs_readdata *rdata;
4653 		struct cifs_credits credits_on_stack;
4654 		struct cifs_credits *credits = &credits_on_stack;
4655 		struct folio *folio;
4656 		pgoff_t fsize;
4657 
4658 		/*
4659 		 * Find out if we have anything cached in the range of
4660 		 * interest, and if so, where the next chunk of cached data is.
4661 		 */
4662 		if (caching) {
4663 			if (check_cache) {
4664 				rc = cifs_fscache_query_occupancy(
4665 					ractl->mapping->host, ra_index, nr_pages,
4666 					&next_cached, &cache_nr_pages);
4667 				if (rc < 0)
4668 					caching = false;
4669 				check_cache = false;
4670 			}
4671 
4672 			if (ra_index == next_cached) {
4673 				/*
4674 				 * TODO: Send a whole batch of pages to be read
4675 				 * by the cache.
4676 				 */
4677 				folio = readahead_folio(ractl);
4678 				fsize = folio_nr_pages(folio);
4679 				ra_pages -= fsize;
4680 				ra_index += fsize;
4681 				if (cifs_readpage_from_fscache(ractl->mapping->host,
4682 							       &folio->page) < 0) {
4683 					/*
4684 					 * TODO: Deal with cache read failure
4685 					 * here, but for the moment, delegate
4686 					 * that to readpage.
4687 					 */
4688 					caching = false;
4689 				}
4690 				folio_unlock(folio);
4691 				next_cached += fsize;
4692 				cache_nr_pages -= fsize;
4693 				if (cache_nr_pages == 0)
4694 					check_cache = true;
4695 				continue;
4696 			}
4697 		}
4698 
4699 		if (open_file->invalidHandle) {
4700 			rc = cifs_reopen_file(open_file, true);
4701 			if (rc) {
4702 				if (rc == -EAGAIN)
4703 					continue;
4704 				break;
4705 			}
4706 		}
4707 
4708 		if (cifs_sb->ctx->rsize == 0)
4709 			cifs_sb->ctx->rsize =
4710 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4711 							     cifs_sb->ctx);
4712 
4713 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4714 						   &rsize, credits);
4715 		if (rc)
4716 			break;
4717 		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4718 		if (next_cached != ULONG_MAX)
4719 			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4720 
4721 		/*
4722 		 * Give up immediately if rsize is too small to read an entire
4723 		 * page. The VFS will fall back to readpage. We should never
4724 		 * reach this point however since we set ra_pages to 0 when the
4725 		 * rsize is smaller than a cache page.
4726 		 */
4727 		if (unlikely(!nr_pages)) {
4728 			add_credits_and_wake_if(server, credits, 0);
4729 			break;
4730 		}
4731 
4732 		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4733 		if (!rdata) {
4734 			/* best to give up if we're out of mem */
4735 			add_credits_and_wake_if(server, credits, 0);
4736 			break;
4737 		}
4738 
4739 		rdata->offset	= ra_index * PAGE_SIZE;
4740 		rdata->bytes	= nr_pages * PAGE_SIZE;
4741 		rdata->cfile	= cifsFileInfo_get(open_file);
4742 		rdata->server	= server;
4743 		rdata->mapping	= ractl->mapping;
4744 		rdata->pid	= pid;
4745 		rdata->credits	= credits_on_stack;
4746 
4747 		for (i = 0; i < nr_pages; i++) {
4748 			if (!readahead_folio(ractl))
4749 				WARN_ON(1);
4750 		}
4751 		ra_pages -= nr_pages;
4752 		ra_index += nr_pages;
4753 
4754 		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4755 				rdata->offset, rdata->bytes);
4756 
4757 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4758 		if (!rc) {
4759 			if (rdata->cfile->invalidHandle)
4760 				rc = -EAGAIN;
4761 			else
4762 				rc = server->ops->async_readv(rdata);
4763 		}
4764 
4765 		if (rc) {
4766 			add_credits_and_wake_if(server, &rdata->credits, 0);
4767 			cifs_unlock_folios(rdata->mapping,
4768 					   rdata->offset / PAGE_SIZE,
4769 					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4770 			/* Fallback to the readpage in error/reconnect cases */
4771 			kref_put(&rdata->refcount, cifs_readdata_release);
4772 			break;
4773 		}
4774 
4775 		kref_put(&rdata->refcount, cifs_readdata_release);
4776 	}
4777 
4778 	free_xid(xid);
4779 }
4780 
4781 /*
4782  * cifs_readpage_worker must be called with the page pinned
4783  */
4784 static int cifs_readpage_worker(struct file *file, struct page *page,
4785 	loff_t *poffset)
4786 {
4787 	struct inode *inode = file_inode(file);
4788 	struct timespec64 atime, mtime;
4789 	char *read_data;
4790 	int rc;
4791 
4792 	/* Is the page cached? */
4793 	rc = cifs_readpage_from_fscache(inode, page);
4794 	if (rc == 0)
4795 		goto read_complete;
4796 
4797 	read_data = kmap(page);
4798 	/* for reads over a certain size could initiate async read ahead */
4799 
4800 	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4801 
4802 	if (rc < 0)
4803 		goto io_error;
4804 	else
4805 		cifs_dbg(FYI, "Bytes read %d\n", rc);
4806 
4807 	/* we do not want atime to be less than mtime, it broke some apps */
4808 	atime = inode_set_atime_to_ts(inode, current_time(inode));
4809 	mtime = inode_get_mtime(inode);
4810 	if (timespec64_compare(&atime, &mtime) < 0)
4811 		inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4812 
4813 	if (PAGE_SIZE > rc)
4814 		memset(read_data + rc, 0, PAGE_SIZE - rc);
4815 
4816 	flush_dcache_page(page);
4817 	SetPageUptodate(page);
4818 	rc = 0;
4819 
4820 io_error:
4821 	kunmap(page);
4822 
4823 read_complete:
4824 	unlock_page(page);
4825 	return rc;
4826 }
4827 
4828 static int cifs_read_folio(struct file *file, struct folio *folio)
4829 {
4830 	struct page *page = &folio->page;
4831 	loff_t offset = page_file_offset(page);
4832 	int rc = -EACCES;
4833 	unsigned int xid;
4834 
4835 	xid = get_xid();
4836 
4837 	if (file->private_data == NULL) {
4838 		rc = -EBADF;
4839 		free_xid(xid);
4840 		return rc;
4841 	}
4842 
4843 	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4844 		 page, (int)offset, (int)offset);
4845 
4846 	rc = cifs_readpage_worker(file, page, &offset);
4847 
4848 	free_xid(xid);
4849 	return rc;
4850 }
4851 
4852 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4853 {
4854 	struct cifsFileInfo *open_file;
4855 
4856 	spin_lock(&cifs_inode->open_file_lock);
4857 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4858 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4859 			spin_unlock(&cifs_inode->open_file_lock);
4860 			return 1;
4861 		}
4862 	}
4863 	spin_unlock(&cifs_inode->open_file_lock);
4864 	return 0;
4865 }
4866 
4867 /* We do not want to update the file size from server for inodes
4868    open for write - to avoid races with writepage extending
4869    the file - in the future we could consider allowing
4870    refreshing the inode only on increases in the file size
4871    but this is tricky to do without racing with writebehind
4872    page caching in the current Linux kernel design */
4873 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
4874 			    bool from_readdir)
4875 {
4876 	if (!cifsInode)
4877 		return true;
4878 
4879 	if (is_inode_writable(cifsInode) ||
4880 		((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
4881 		/* This inode is open for write at least once */
4882 		struct cifs_sb_info *cifs_sb;
4883 
4884 		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4885 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4886 			/* since no page cache to corrupt on directio
4887 			we can change size safely */
4888 			return true;
4889 		}
4890 
4891 		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4892 			return true;
4893 
4894 		return false;
4895 	} else
4896 		return true;
4897 }
4898 
4899 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4900 			loff_t pos, unsigned len,
4901 			struct page **pagep, void **fsdata)
4902 {
4903 	int oncethru = 0;
4904 	pgoff_t index = pos >> PAGE_SHIFT;
4905 	loff_t offset = pos & (PAGE_SIZE - 1);
4906 	loff_t page_start = pos & PAGE_MASK;
4907 	loff_t i_size;
4908 	struct page *page;
4909 	int rc = 0;
4910 
4911 	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4912 
4913 start:
4914 	page = grab_cache_page_write_begin(mapping, index);
4915 	if (!page) {
4916 		rc = -ENOMEM;
4917 		goto out;
4918 	}
4919 
4920 	if (PageUptodate(page))
4921 		goto out;
4922 
4923 	/*
4924 	 * If we write a full page it will be up to date, no need to read from
4925 	 * the server. If the write is short, we'll end up doing a sync write
4926 	 * instead.
4927 	 */
4928 	if (len == PAGE_SIZE)
4929 		goto out;
4930 
4931 	/*
4932 	 * optimize away the read when we have an oplock, and we're not
4933 	 * expecting to use any of the data we'd be reading in. That
4934 	 * is, when the page lies beyond the EOF, or straddles the EOF
4935 	 * and the write will cover all of the existing data.
4936 	 */
4937 	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4938 		i_size = i_size_read(mapping->host);
4939 		if (page_start >= i_size ||
4940 		    (offset == 0 && (pos + len) >= i_size)) {
4941 			zero_user_segments(page, 0, offset,
4942 					   offset + len,
4943 					   PAGE_SIZE);
4944 			/*
4945 			 * PageChecked means that the parts of the page
4946 			 * to which we're not writing are considered up
4947 			 * to date. Once the data is copied to the
4948 			 * page, it can be set uptodate.
4949 			 */
4950 			SetPageChecked(page);
4951 			goto out;
4952 		}
4953 	}
4954 
4955 	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4956 		/*
4957 		 * might as well read a page, it is fast enough. If we get
4958 		 * an error, we don't need to return it. cifs_write_end will
4959 		 * do a sync write instead since PG_uptodate isn't set.
4960 		 */
4961 		cifs_readpage_worker(file, page, &page_start);
4962 		put_page(page);
4963 		oncethru = 1;
4964 		goto start;
4965 	} else {
4966 		/* we could try using another file handle if there is one -
4967 		   but how would we lock it to prevent close of that handle
4968 		   racing with this read? In any case
4969 		   this will be written out by write_end so is fine */
4970 	}
4971 out:
4972 	*pagep = page;
4973 	return rc;
4974 }
4975 
4976 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4977 {
4978 	if (folio_test_private(folio))
4979 		return 0;
4980 	if (folio_test_fscache(folio)) {
4981 		if (current_is_kswapd() || !(gfp & __GFP_FS))
4982 			return false;
4983 		folio_wait_fscache(folio);
4984 	}
4985 	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4986 	return true;
4987 }
4988 
4989 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4990 				 size_t length)
4991 {
4992 	folio_wait_fscache(folio);
4993 }
4994 
4995 static int cifs_launder_folio(struct folio *folio)
4996 {
4997 	int rc = 0;
4998 	loff_t range_start = folio_pos(folio);
4999 	loff_t range_end = range_start + folio_size(folio);
5000 	struct writeback_control wbc = {
5001 		.sync_mode = WB_SYNC_ALL,
5002 		.nr_to_write = 0,
5003 		.range_start = range_start,
5004 		.range_end = range_end,
5005 	};
5006 
5007 	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5008 
5009 	if (folio_clear_dirty_for_io(folio))
5010 		rc = cifs_writepage_locked(&folio->page, &wbc);
5011 
5012 	folio_wait_fscache(folio);
5013 	return rc;
5014 }
5015 
5016 void cifs_oplock_break(struct work_struct *work)
5017 {
5018 	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5019 						  oplock_break);
5020 	struct inode *inode = d_inode(cfile->dentry);
5021 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
5022 	struct cifsInodeInfo *cinode = CIFS_I(inode);
5023 	struct cifs_tcon *tcon;
5024 	struct TCP_Server_Info *server;
5025 	struct tcon_link *tlink;
5026 	int rc = 0;
5027 	bool purge_cache = false, oplock_break_cancelled;
5028 	__u64 persistent_fid, volatile_fid;
5029 	__u16 net_fid;
5030 
5031 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5032 			TASK_UNINTERRUPTIBLE);
5033 
5034 	tlink = cifs_sb_tlink(cifs_sb);
5035 	if (IS_ERR(tlink))
5036 		goto out;
5037 	tcon = tlink_tcon(tlink);
5038 	server = tcon->ses->server;
5039 
5040 	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5041 				      cfile->oplock_epoch, &purge_cache);
5042 
5043 	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5044 						cifs_has_mand_locks(cinode)) {
5045 		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5046 			 inode);
5047 		cinode->oplock = 0;
5048 	}
5049 
5050 	if (inode && S_ISREG(inode->i_mode)) {
5051 		if (CIFS_CACHE_READ(cinode))
5052 			break_lease(inode, O_RDONLY);
5053 		else
5054 			break_lease(inode, O_WRONLY);
5055 		rc = filemap_fdatawrite(inode->i_mapping);
5056 		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5057 			rc = filemap_fdatawait(inode->i_mapping);
5058 			mapping_set_error(inode->i_mapping, rc);
5059 			cifs_zap_mapping(inode);
5060 		}
5061 		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5062 		if (CIFS_CACHE_WRITE(cinode))
5063 			goto oplock_break_ack;
5064 	}
5065 
5066 	rc = cifs_push_locks(cfile);
5067 	if (rc)
5068 		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5069 
5070 oplock_break_ack:
5071 	/*
5072 	 * When oplock break is received and there are no active
5073 	 * file handles but cached, then schedule deferred close immediately.
5074 	 * So, new open will not use cached handle.
5075 	 */
5076 
5077 	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
5078 		cifs_close_deferred_file(cinode);
5079 
5080 	persistent_fid = cfile->fid.persistent_fid;
5081 	volatile_fid = cfile->fid.volatile_fid;
5082 	net_fid = cfile->fid.netfid;
5083 	oplock_break_cancelled = cfile->oplock_break_cancelled;
5084 
5085 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5086 	/*
5087 	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5088 	 * an acknowledgment to be sent when the file has already been closed.
5089 	 */
5090 	spin_lock(&cinode->open_file_lock);
5091 	/* check list empty since can race with kill_sb calling tree disconnect */
5092 	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5093 		spin_unlock(&cinode->open_file_lock);
5094 		rc = server->ops->oplock_response(tcon, persistent_fid,
5095 						  volatile_fid, net_fid, cinode);
5096 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5097 	} else
5098 		spin_unlock(&cinode->open_file_lock);
5099 
5100 	cifs_put_tlink(tlink);
5101 out:
5102 	cifs_done_oplock_break(cinode);
5103 }
5104 
5105 /*
5106  * The presence of cifs_direct_io() in the address space ops vector
5107  * allowes open() O_DIRECT flags which would have failed otherwise.
5108  *
5109  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5110  * so this method should never be called.
5111  *
5112  * Direct IO is not yet supported in the cached mode.
5113  */
5114 static ssize_t
5115 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5116 {
5117         /*
5118          * FIXME
5119          * Eventually need to support direct IO for non forcedirectio mounts
5120          */
5121         return -EINVAL;
5122 }
5123 
5124 static int cifs_swap_activate(struct swap_info_struct *sis,
5125 			      struct file *swap_file, sector_t *span)
5126 {
5127 	struct cifsFileInfo *cfile = swap_file->private_data;
5128 	struct inode *inode = swap_file->f_mapping->host;
5129 	unsigned long blocks;
5130 	long long isize;
5131 
5132 	cifs_dbg(FYI, "swap activate\n");
5133 
5134 	if (!swap_file->f_mapping->a_ops->swap_rw)
5135 		/* Cannot support swap */
5136 		return -EINVAL;
5137 
5138 	spin_lock(&inode->i_lock);
5139 	blocks = inode->i_blocks;
5140 	isize = inode->i_size;
5141 	spin_unlock(&inode->i_lock);
5142 	if (blocks*512 < isize) {
5143 		pr_warn("swap activate: swapfile has holes\n");
5144 		return -EINVAL;
5145 	}
5146 	*span = sis->pages;
5147 
5148 	pr_warn_once("Swap support over SMB3 is experimental\n");
5149 
5150 	/*
5151 	 * TODO: consider adding ACL (or documenting how) to prevent other
5152 	 * users (on this or other systems) from reading it
5153 	 */
5154 
5155 
5156 	/* TODO: add sk_set_memalloc(inet) or similar */
5157 
5158 	if (cfile)
5159 		cfile->swapfile = true;
5160 	/*
5161 	 * TODO: Since file already open, we can't open with DENY_ALL here
5162 	 * but we could add call to grab a byte range lock to prevent others
5163 	 * from reading or writing the file
5164 	 */
5165 
5166 	sis->flags |= SWP_FS_OPS;
5167 	return add_swap_extent(sis, 0, sis->max, 0);
5168 }
5169 
5170 static void cifs_swap_deactivate(struct file *file)
5171 {
5172 	struct cifsFileInfo *cfile = file->private_data;
5173 
5174 	cifs_dbg(FYI, "swap deactivate\n");
5175 
5176 	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5177 
5178 	if (cfile)
5179 		cfile->swapfile = false;
5180 
5181 	/* do we need to unpin (or unlock) the file */
5182 }
5183 
5184 const struct address_space_operations cifs_addr_ops = {
5185 	.read_folio = cifs_read_folio,
5186 	.readahead = cifs_readahead,
5187 	.writepages = cifs_writepages,
5188 	.write_begin = cifs_write_begin,
5189 	.write_end = cifs_write_end,
5190 	.dirty_folio = netfs_dirty_folio,
5191 	.release_folio = cifs_release_folio,
5192 	.direct_IO = cifs_direct_io,
5193 	.invalidate_folio = cifs_invalidate_folio,
5194 	.launder_folio = cifs_launder_folio,
5195 	.migrate_folio = filemap_migrate_folio,
5196 	/*
5197 	 * TODO: investigate and if useful we could add an is_dirty_writeback
5198 	 * helper if needed
5199 	 */
5200 	.swap_activate = cifs_swap_activate,
5201 	.swap_deactivate = cifs_swap_deactivate,
5202 };
5203 
5204 /*
5205  * cifs_readahead requires the server to support a buffer large enough to
5206  * contain the header plus one complete page of data.  Otherwise, we need
5207  * to leave cifs_readahead out of the address space operations.
5208  */
5209 const struct address_space_operations cifs_addr_ops_smallbuf = {
5210 	.read_folio = cifs_read_folio,
5211 	.writepages = cifs_writepages,
5212 	.write_begin = cifs_write_begin,
5213 	.write_end = cifs_write_end,
5214 	.dirty_folio = netfs_dirty_folio,
5215 	.release_folio = cifs_release_folio,
5216 	.invalidate_folio = cifs_invalidate_folio,
5217 	.launder_folio = cifs_launder_folio,
5218 	.migrate_folio = filemap_migrate_folio,
5219 };
5220