xref: /linux/fs/locks.c (revision 50a483405c420f5f35b8dbb71425459835ae44eb)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   *  linux/fs/locks.c
4   *
5   * We implement four types of file locks: BSD locks, posix locks, open
6   * file description locks, and leases.  For details about BSD locks,
7   * see the flock(2) man page; for details about the other three, see
8   * fcntl(2).
9   *
10   *
11   * Locking conflicts and dependencies:
12   * If multiple threads attempt to lock the same byte (or flock the same file)
13   * only one can be granted the lock, and other must wait their turn.
14   * The first lock has been "applied" or "granted", the others are "waiting"
15   * and are "blocked" by the "applied" lock..
16   *
17   * Waiting and applied locks are all kept in trees whose properties are:
18   *
19   *	- the root of a tree may be an applied or waiting lock.
20   *	- every other node in the tree is a waiting lock that
21   *	  conflicts with every ancestor of that node.
22   *
23   * Every such tree begins life as a waiting singleton which obviously
24   * satisfies the above properties.
25   *
26   * The only ways we modify trees preserve these properties:
27   *
28   *	1. We may add a new leaf node, but only after first verifying that it
29   *	   conflicts with all of its ancestors.
30   *	2. We may remove the root of a tree, creating a new singleton
31   *	   tree from the root and N new trees rooted in the immediate
32   *	   children.
33   *	3. If the root of a tree is not currently an applied lock, we may
34   *	   apply it (if possible).
35   *	4. We may upgrade the root of the tree (either extend its range,
36   *	   or upgrade its entire range from read to write).
37   *
38   * When an applied lock is modified in a way that reduces or downgrades any
39   * part of its range, we remove all its children (2 above).  This particularly
40   * happens when a lock is unlocked.
41   *
42   * For each of those child trees we "wake up" the thread which is
43   * waiting for the lock so it can continue handling as follows: if the
44   * root of the tree applies, we do so (3).  If it doesn't, it must
45   * conflict with some applied lock.  We remove (wake up) all of its children
46   * (2), and add it is a new leaf to the tree rooted in the applied
47   * lock (1).  We then repeat the process recursively with those
48   * children.
49   *
50   */
51  
52  #include <linux/capability.h>
53  #include <linux/file.h>
54  #include <linux/fdtable.h>
55  #include <linux/fs.h>
56  #include <linux/init.h>
57  #include <linux/security.h>
58  #include <linux/slab.h>
59  #include <linux/syscalls.h>
60  #include <linux/time.h>
61  #include <linux/rcupdate.h>
62  #include <linux/pid_namespace.h>
63  #include <linux/hashtable.h>
64  #include <linux/percpu.h>
65  
66  #define CREATE_TRACE_POINTS
67  #include <trace/events/filelock.h>
68  
69  #include <linux/uaccess.h>
70  
71  #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
72  #define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
73  #define IS_LEASE(fl)	(fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
74  #define IS_OFDLCK(fl)	(fl->fl_flags & FL_OFDLCK)
75  #define IS_REMOTELCK(fl)	(fl->fl_pid <= 0)
76  
77  static bool lease_breaking(struct file_lock *fl)
78  {
79  	return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
80  }
81  
82  static int target_leasetype(struct file_lock *fl)
83  {
84  	if (fl->fl_flags & FL_UNLOCK_PENDING)
85  		return F_UNLCK;
86  	if (fl->fl_flags & FL_DOWNGRADE_PENDING)
87  		return F_RDLCK;
88  	return fl->fl_type;
89  }
90  
91  int leases_enable = 1;
92  int lease_break_time = 45;
93  
94  /*
95   * The global file_lock_list is only used for displaying /proc/locks, so we
96   * keep a list on each CPU, with each list protected by its own spinlock.
97   * Global serialization is done using file_rwsem.
98   *
99   * Note that alterations to the list also require that the relevant flc_lock is
100   * held.
101   */
102  struct file_lock_list_struct {
103  	spinlock_t		lock;
104  	struct hlist_head	hlist;
105  };
106  static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
107  DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
108  
109  
110  /*
111   * The blocked_hash is used to find POSIX lock loops for deadlock detection.
112   * It is protected by blocked_lock_lock.
113   *
114   * We hash locks by lockowner in order to optimize searching for the lock a
115   * particular lockowner is waiting on.
116   *
117   * FIXME: make this value scale via some heuristic? We generally will want more
118   * buckets when we have more lockowners holding locks, but that's a little
119   * difficult to determine without knowing what the workload will look like.
120   */
121  #define BLOCKED_HASH_BITS	7
122  static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
123  
124  /*
125   * This lock protects the blocked_hash. Generally, if you're accessing it, you
126   * want to be holding this lock.
127   *
128   * In addition, it also protects the fl->fl_blocked_requests list, and the
129   * fl->fl_blocker pointer for file_lock structures that are acting as lock
130   * requests (in contrast to those that are acting as records of acquired locks).
131   *
132   * Note that when we acquire this lock in order to change the above fields,
133   * we often hold the flc_lock as well. In certain cases, when reading the fields
134   * protected by this lock, we can skip acquiring it iff we already hold the
135   * flc_lock.
136   */
137  static DEFINE_SPINLOCK(blocked_lock_lock);
138  
139  static struct kmem_cache *flctx_cache __read_mostly;
140  static struct kmem_cache *filelock_cache __read_mostly;
141  
142  static struct file_lock_context *
143  locks_get_lock_context(struct inode *inode, int type)
144  {
145  	struct file_lock_context *ctx;
146  
147  	/* paired with cmpxchg() below */
148  	ctx = smp_load_acquire(&inode->i_flctx);
149  	if (likely(ctx) || type == F_UNLCK)
150  		goto out;
151  
152  	ctx = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
153  	if (!ctx)
154  		goto out;
155  
156  	spin_lock_init(&ctx->flc_lock);
157  	INIT_LIST_HEAD(&ctx->flc_flock);
158  	INIT_LIST_HEAD(&ctx->flc_posix);
159  	INIT_LIST_HEAD(&ctx->flc_lease);
160  
161  	/*
162  	 * Assign the pointer if it's not already assigned. If it is, then
163  	 * free the context we just allocated.
164  	 */
165  	if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
166  		kmem_cache_free(flctx_cache, ctx);
167  		ctx = smp_load_acquire(&inode->i_flctx);
168  	}
169  out:
170  	trace_locks_get_lock_context(inode, type, ctx);
171  	return ctx;
172  }
173  
174  static void
175  locks_dump_ctx_list(struct list_head *list, char *list_type)
176  {
177  	struct file_lock *fl;
178  
179  	list_for_each_entry(fl, list, fl_list) {
180  		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
181  	}
182  }
183  
184  static void
185  locks_check_ctx_lists(struct inode *inode)
186  {
187  	struct file_lock_context *ctx = inode->i_flctx;
188  
189  	if (unlikely(!list_empty(&ctx->flc_flock) ||
190  		     !list_empty(&ctx->flc_posix) ||
191  		     !list_empty(&ctx->flc_lease))) {
192  		pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
193  			MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
194  			inode->i_ino);
195  		locks_dump_ctx_list(&ctx->flc_flock, "FLOCK");
196  		locks_dump_ctx_list(&ctx->flc_posix, "POSIX");
197  		locks_dump_ctx_list(&ctx->flc_lease, "LEASE");
198  	}
199  }
200  
201  static void
202  locks_check_ctx_file_list(struct file *filp, struct list_head *list,
203  				char *list_type)
204  {
205  	struct file_lock *fl;
206  	struct inode *inode = locks_inode(filp);
207  
208  	list_for_each_entry(fl, list, fl_list)
209  		if (fl->fl_file == filp)
210  			pr_warn("Leaked %s lock on dev=0x%x:0x%x ino=0x%lx "
211  				" fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
212  				list_type, MAJOR(inode->i_sb->s_dev),
213  				MINOR(inode->i_sb->s_dev), inode->i_ino,
214  				fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
215  }
216  
217  void
218  locks_free_lock_context(struct inode *inode)
219  {
220  	struct file_lock_context *ctx = inode->i_flctx;
221  
222  	if (unlikely(ctx)) {
223  		locks_check_ctx_lists(inode);
224  		kmem_cache_free(flctx_cache, ctx);
225  	}
226  }
227  
228  static void locks_init_lock_heads(struct file_lock *fl)
229  {
230  	INIT_HLIST_NODE(&fl->fl_link);
231  	INIT_LIST_HEAD(&fl->fl_list);
232  	INIT_LIST_HEAD(&fl->fl_blocked_requests);
233  	INIT_LIST_HEAD(&fl->fl_blocked_member);
234  	init_waitqueue_head(&fl->fl_wait);
235  }
236  
237  /* Allocate an empty lock structure. */
238  struct file_lock *locks_alloc_lock(void)
239  {
240  	struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
241  
242  	if (fl)
243  		locks_init_lock_heads(fl);
244  
245  	return fl;
246  }
247  EXPORT_SYMBOL_GPL(locks_alloc_lock);
248  
249  void locks_release_private(struct file_lock *fl)
250  {
251  	BUG_ON(waitqueue_active(&fl->fl_wait));
252  	BUG_ON(!list_empty(&fl->fl_list));
253  	BUG_ON(!list_empty(&fl->fl_blocked_requests));
254  	BUG_ON(!list_empty(&fl->fl_blocked_member));
255  	BUG_ON(!hlist_unhashed(&fl->fl_link));
256  
257  	if (fl->fl_ops) {
258  		if (fl->fl_ops->fl_release_private)
259  			fl->fl_ops->fl_release_private(fl);
260  		fl->fl_ops = NULL;
261  	}
262  
263  	if (fl->fl_lmops) {
264  		if (fl->fl_lmops->lm_put_owner) {
265  			fl->fl_lmops->lm_put_owner(fl->fl_owner);
266  			fl->fl_owner = NULL;
267  		}
268  		fl->fl_lmops = NULL;
269  	}
270  }
271  EXPORT_SYMBOL_GPL(locks_release_private);
272  
273  /* Free a lock which is not in use. */
274  void locks_free_lock(struct file_lock *fl)
275  {
276  	locks_release_private(fl);
277  	kmem_cache_free(filelock_cache, fl);
278  }
279  EXPORT_SYMBOL(locks_free_lock);
280  
281  static void
282  locks_dispose_list(struct list_head *dispose)
283  {
284  	struct file_lock *fl;
285  
286  	while (!list_empty(dispose)) {
287  		fl = list_first_entry(dispose, struct file_lock, fl_list);
288  		list_del_init(&fl->fl_list);
289  		locks_free_lock(fl);
290  	}
291  }
292  
293  void locks_init_lock(struct file_lock *fl)
294  {
295  	memset(fl, 0, sizeof(struct file_lock));
296  	locks_init_lock_heads(fl);
297  }
298  EXPORT_SYMBOL(locks_init_lock);
299  
300  /*
301   * Initialize a new lock from an existing file_lock structure.
302   */
303  void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
304  {
305  	new->fl_owner = fl->fl_owner;
306  	new->fl_pid = fl->fl_pid;
307  	new->fl_file = NULL;
308  	new->fl_flags = fl->fl_flags;
309  	new->fl_type = fl->fl_type;
310  	new->fl_start = fl->fl_start;
311  	new->fl_end = fl->fl_end;
312  	new->fl_lmops = fl->fl_lmops;
313  	new->fl_ops = NULL;
314  
315  	if (fl->fl_lmops) {
316  		if (fl->fl_lmops->lm_get_owner)
317  			fl->fl_lmops->lm_get_owner(fl->fl_owner);
318  	}
319  }
320  EXPORT_SYMBOL(locks_copy_conflock);
321  
322  void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
323  {
324  	/* "new" must be a freshly-initialized lock */
325  	WARN_ON_ONCE(new->fl_ops);
326  
327  	locks_copy_conflock(new, fl);
328  
329  	new->fl_file = fl->fl_file;
330  	new->fl_ops = fl->fl_ops;
331  
332  	if (fl->fl_ops) {
333  		if (fl->fl_ops->fl_copy_lock)
334  			fl->fl_ops->fl_copy_lock(new, fl);
335  	}
336  }
337  EXPORT_SYMBOL(locks_copy_lock);
338  
339  static void locks_move_blocks(struct file_lock *new, struct file_lock *fl)
340  {
341  	struct file_lock *f;
342  
343  	/*
344  	 * As ctx->flc_lock is held, new requests cannot be added to
345  	 * ->fl_blocked_requests, so we don't need a lock to check if it
346  	 * is empty.
347  	 */
348  	if (list_empty(&fl->fl_blocked_requests))
349  		return;
350  	spin_lock(&blocked_lock_lock);
351  	list_splice_init(&fl->fl_blocked_requests, &new->fl_blocked_requests);
352  	list_for_each_entry(f, &new->fl_blocked_requests, fl_blocked_member)
353  		f->fl_blocker = new;
354  	spin_unlock(&blocked_lock_lock);
355  }
356  
357  static inline int flock_translate_cmd(int cmd) {
358  	switch (cmd) {
359  	case LOCK_SH:
360  		return F_RDLCK;
361  	case LOCK_EX:
362  		return F_WRLCK;
363  	case LOCK_UN:
364  		return F_UNLCK;
365  	}
366  	return -EINVAL;
367  }
368  
369  /* Fill in a file_lock structure with an appropriate FLOCK lock. */
370  static struct file_lock *
371  flock_make_lock(struct file *filp, unsigned int cmd, struct file_lock *fl)
372  {
373  	int type = flock_translate_cmd(cmd);
374  
375  	if (type < 0)
376  		return ERR_PTR(type);
377  
378  	if (fl == NULL) {
379  		fl = locks_alloc_lock();
380  		if (fl == NULL)
381  			return ERR_PTR(-ENOMEM);
382  	} else {
383  		locks_init_lock(fl);
384  	}
385  
386  	fl->fl_file = filp;
387  	fl->fl_owner = filp;
388  	fl->fl_pid = current->tgid;
389  	fl->fl_flags = FL_FLOCK;
390  	fl->fl_type = type;
391  	fl->fl_end = OFFSET_MAX;
392  
393  	return fl;
394  }
395  
396  static int assign_type(struct file_lock *fl, long type)
397  {
398  	switch (type) {
399  	case F_RDLCK:
400  	case F_WRLCK:
401  	case F_UNLCK:
402  		fl->fl_type = type;
403  		break;
404  	default:
405  		return -EINVAL;
406  	}
407  	return 0;
408  }
409  
410  static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
411  				 struct flock64 *l)
412  {
413  	switch (l->l_whence) {
414  	case SEEK_SET:
415  		fl->fl_start = 0;
416  		break;
417  	case SEEK_CUR:
418  		fl->fl_start = filp->f_pos;
419  		break;
420  	case SEEK_END:
421  		fl->fl_start = i_size_read(file_inode(filp));
422  		break;
423  	default:
424  		return -EINVAL;
425  	}
426  	if (l->l_start > OFFSET_MAX - fl->fl_start)
427  		return -EOVERFLOW;
428  	fl->fl_start += l->l_start;
429  	if (fl->fl_start < 0)
430  		return -EINVAL;
431  
432  	/* POSIX-1996 leaves the case l->l_len < 0 undefined;
433  	   POSIX-2001 defines it. */
434  	if (l->l_len > 0) {
435  		if (l->l_len - 1 > OFFSET_MAX - fl->fl_start)
436  			return -EOVERFLOW;
437  		fl->fl_end = fl->fl_start + (l->l_len - 1);
438  
439  	} else if (l->l_len < 0) {
440  		if (fl->fl_start + l->l_len < 0)
441  			return -EINVAL;
442  		fl->fl_end = fl->fl_start - 1;
443  		fl->fl_start += l->l_len;
444  	} else
445  		fl->fl_end = OFFSET_MAX;
446  
447  	fl->fl_owner = current->files;
448  	fl->fl_pid = current->tgid;
449  	fl->fl_file = filp;
450  	fl->fl_flags = FL_POSIX;
451  	fl->fl_ops = NULL;
452  	fl->fl_lmops = NULL;
453  
454  	return assign_type(fl, l->l_type);
455  }
456  
457  /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX
458   * style lock.
459   */
460  static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
461  			       struct flock *l)
462  {
463  	struct flock64 ll = {
464  		.l_type = l->l_type,
465  		.l_whence = l->l_whence,
466  		.l_start = l->l_start,
467  		.l_len = l->l_len,
468  	};
469  
470  	return flock64_to_posix_lock(filp, fl, &ll);
471  }
472  
473  /* default lease lock manager operations */
474  static bool
475  lease_break_callback(struct file_lock *fl)
476  {
477  	kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
478  	return false;
479  }
480  
481  static void
482  lease_setup(struct file_lock *fl, void **priv)
483  {
484  	struct file *filp = fl->fl_file;
485  	struct fasync_struct *fa = *priv;
486  
487  	/*
488  	 * fasync_insert_entry() returns the old entry if any. If there was no
489  	 * old entry, then it used "priv" and inserted it into the fasync list.
490  	 * Clear the pointer to indicate that it shouldn't be freed.
491  	 */
492  	if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa))
493  		*priv = NULL;
494  
495  	__f_setown(filp, task_pid(current), PIDTYPE_TGID, 0);
496  }
497  
498  static const struct lock_manager_operations lease_manager_ops = {
499  	.lm_break = lease_break_callback,
500  	.lm_change = lease_modify,
501  	.lm_setup = lease_setup,
502  };
503  
504  /*
505   * Initialize a lease, use the default lock manager operations
506   */
507  static int lease_init(struct file *filp, long type, struct file_lock *fl)
508  {
509  	if (assign_type(fl, type) != 0)
510  		return -EINVAL;
511  
512  	fl->fl_owner = filp;
513  	fl->fl_pid = current->tgid;
514  
515  	fl->fl_file = filp;
516  	fl->fl_flags = FL_LEASE;
517  	fl->fl_start = 0;
518  	fl->fl_end = OFFSET_MAX;
519  	fl->fl_ops = NULL;
520  	fl->fl_lmops = &lease_manager_ops;
521  	return 0;
522  }
523  
524  /* Allocate a file_lock initialised to this type of lease */
525  static struct file_lock *lease_alloc(struct file *filp, long type)
526  {
527  	struct file_lock *fl = locks_alloc_lock();
528  	int error = -ENOMEM;
529  
530  	if (fl == NULL)
531  		return ERR_PTR(error);
532  
533  	error = lease_init(filp, type, fl);
534  	if (error) {
535  		locks_free_lock(fl);
536  		return ERR_PTR(error);
537  	}
538  	return fl;
539  }
540  
541  /* Check if two locks overlap each other.
542   */
543  static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
544  {
545  	return ((fl1->fl_end >= fl2->fl_start) &&
546  		(fl2->fl_end >= fl1->fl_start));
547  }
548  
549  /*
550   * Check whether two locks have the same owner.
551   */
552  static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
553  {
554  	return fl1->fl_owner == fl2->fl_owner;
555  }
556  
557  /* Must be called with the flc_lock held! */
558  static void locks_insert_global_locks(struct file_lock *fl)
559  {
560  	struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list);
561  
562  	percpu_rwsem_assert_held(&file_rwsem);
563  
564  	spin_lock(&fll->lock);
565  	fl->fl_link_cpu = smp_processor_id();
566  	hlist_add_head(&fl->fl_link, &fll->hlist);
567  	spin_unlock(&fll->lock);
568  }
569  
570  /* Must be called with the flc_lock held! */
571  static void locks_delete_global_locks(struct file_lock *fl)
572  {
573  	struct file_lock_list_struct *fll;
574  
575  	percpu_rwsem_assert_held(&file_rwsem);
576  
577  	/*
578  	 * Avoid taking lock if already unhashed. This is safe since this check
579  	 * is done while holding the flc_lock, and new insertions into the list
580  	 * also require that it be held.
581  	 */
582  	if (hlist_unhashed(&fl->fl_link))
583  		return;
584  
585  	fll = per_cpu_ptr(&file_lock_list, fl->fl_link_cpu);
586  	spin_lock(&fll->lock);
587  	hlist_del_init(&fl->fl_link);
588  	spin_unlock(&fll->lock);
589  }
590  
591  static unsigned long
592  posix_owner_key(struct file_lock *fl)
593  {
594  	return (unsigned long)fl->fl_owner;
595  }
596  
597  static void locks_insert_global_blocked(struct file_lock *waiter)
598  {
599  	lockdep_assert_held(&blocked_lock_lock);
600  
601  	hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter));
602  }
603  
604  static void locks_delete_global_blocked(struct file_lock *waiter)
605  {
606  	lockdep_assert_held(&blocked_lock_lock);
607  
608  	hash_del(&waiter->fl_link);
609  }
610  
611  /* Remove waiter from blocker's block list.
612   * When blocker ends up pointing to itself then the list is empty.
613   *
614   * Must be called with blocked_lock_lock held.
615   */
616  static void __locks_delete_block(struct file_lock *waiter)
617  {
618  	locks_delete_global_blocked(waiter);
619  	list_del_init(&waiter->fl_blocked_member);
620  }
621  
622  static void __locks_wake_up_blocks(struct file_lock *blocker)
623  {
624  	while (!list_empty(&blocker->fl_blocked_requests)) {
625  		struct file_lock *waiter;
626  
627  		waiter = list_first_entry(&blocker->fl_blocked_requests,
628  					  struct file_lock, fl_blocked_member);
629  		__locks_delete_block(waiter);
630  		if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
631  			waiter->fl_lmops->lm_notify(waiter);
632  		else
633  			wake_up(&waiter->fl_wait);
634  
635  		/*
636  		 * The setting of fl_blocker to NULL marks the "done"
637  		 * point in deleting a block. Paired with acquire at the top
638  		 * of locks_delete_block().
639  		 */
640  		smp_store_release(&waiter->fl_blocker, NULL);
641  	}
642  }
643  
644  /**
645   *	locks_delete_block - stop waiting for a file lock
646   *	@waiter: the lock which was waiting
647   *
648   *	lockd/nfsd need to disconnect the lock while working on it.
649   */
650  int locks_delete_block(struct file_lock *waiter)
651  {
652  	int status = -ENOENT;
653  
654  	/*
655  	 * If fl_blocker is NULL, it won't be set again as this thread "owns"
656  	 * the lock and is the only one that might try to claim the lock.
657  	 *
658  	 * We use acquire/release to manage fl_blocker so that we can
659  	 * optimize away taking the blocked_lock_lock in many cases.
660  	 *
661  	 * The smp_load_acquire guarantees two things:
662  	 *
663  	 * 1/ that fl_blocked_requests can be tested locklessly. If something
664  	 * was recently added to that list it must have been in a locked region
665  	 * *before* the locked region when fl_blocker was set to NULL.
666  	 *
667  	 * 2/ that no other thread is accessing 'waiter', so it is safe to free
668  	 * it.  __locks_wake_up_blocks is careful not to touch waiter after
669  	 * fl_blocker is released.
670  	 *
671  	 * If a lockless check of fl_blocker shows it to be NULL, we know that
672  	 * no new locks can be inserted into its fl_blocked_requests list, and
673  	 * can avoid doing anything further if the list is empty.
674  	 */
675  	if (!smp_load_acquire(&waiter->fl_blocker) &&
676  	    list_empty(&waiter->fl_blocked_requests))
677  		return status;
678  
679  	spin_lock(&blocked_lock_lock);
680  	if (waiter->fl_blocker)
681  		status = 0;
682  	__locks_wake_up_blocks(waiter);
683  	__locks_delete_block(waiter);
684  
685  	/*
686  	 * The setting of fl_blocker to NULL marks the "done" point in deleting
687  	 * a block. Paired with acquire at the top of this function.
688  	 */
689  	smp_store_release(&waiter->fl_blocker, NULL);
690  	spin_unlock(&blocked_lock_lock);
691  	return status;
692  }
693  EXPORT_SYMBOL(locks_delete_block);
694  
695  /* Insert waiter into blocker's block list.
696   * We use a circular list so that processes can be easily woken up in
697   * the order they blocked. The documentation doesn't require this but
698   * it seems like the reasonable thing to do.
699   *
700   * Must be called with both the flc_lock and blocked_lock_lock held. The
701   * fl_blocked_requests list itself is protected by the blocked_lock_lock,
702   * but by ensuring that the flc_lock is also held on insertions we can avoid
703   * taking the blocked_lock_lock in some cases when we see that the
704   * fl_blocked_requests list is empty.
705   *
706   * Rather than just adding to the list, we check for conflicts with any existing
707   * waiters, and add beneath any waiter that blocks the new waiter.
708   * Thus wakeups don't happen until needed.
709   */
710  static void __locks_insert_block(struct file_lock *blocker,
711  				 struct file_lock *waiter,
712  				 bool conflict(struct file_lock *,
713  					       struct file_lock *))
714  {
715  	struct file_lock *fl;
716  	BUG_ON(!list_empty(&waiter->fl_blocked_member));
717  
718  new_blocker:
719  	list_for_each_entry(fl, &blocker->fl_blocked_requests, fl_blocked_member)
720  		if (conflict(fl, waiter)) {
721  			blocker =  fl;
722  			goto new_blocker;
723  		}
724  	waiter->fl_blocker = blocker;
725  	list_add_tail(&waiter->fl_blocked_member, &blocker->fl_blocked_requests);
726  	if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
727  		locks_insert_global_blocked(waiter);
728  
729  	/* The requests in waiter->fl_blocked are known to conflict with
730  	 * waiter, but might not conflict with blocker, or the requests
731  	 * and lock which block it.  So they all need to be woken.
732  	 */
733  	__locks_wake_up_blocks(waiter);
734  }
735  
736  /* Must be called with flc_lock held. */
737  static void locks_insert_block(struct file_lock *blocker,
738  			       struct file_lock *waiter,
739  			       bool conflict(struct file_lock *,
740  					     struct file_lock *))
741  {
742  	spin_lock(&blocked_lock_lock);
743  	__locks_insert_block(blocker, waiter, conflict);
744  	spin_unlock(&blocked_lock_lock);
745  }
746  
747  /*
748   * Wake up processes blocked waiting for blocker.
749   *
750   * Must be called with the inode->flc_lock held!
751   */
752  static void locks_wake_up_blocks(struct file_lock *blocker)
753  {
754  	/*
755  	 * Avoid taking global lock if list is empty. This is safe since new
756  	 * blocked requests are only added to the list under the flc_lock, and
757  	 * the flc_lock is always held here. Note that removal from the
758  	 * fl_blocked_requests list does not require the flc_lock, so we must
759  	 * recheck list_empty() after acquiring the blocked_lock_lock.
760  	 */
761  	if (list_empty(&blocker->fl_blocked_requests))
762  		return;
763  
764  	spin_lock(&blocked_lock_lock);
765  	__locks_wake_up_blocks(blocker);
766  	spin_unlock(&blocked_lock_lock);
767  }
768  
769  static void
770  locks_insert_lock_ctx(struct file_lock *fl, struct list_head *before)
771  {
772  	list_add_tail(&fl->fl_list, before);
773  	locks_insert_global_locks(fl);
774  }
775  
776  static void
777  locks_unlink_lock_ctx(struct file_lock *fl)
778  {
779  	locks_delete_global_locks(fl);
780  	list_del_init(&fl->fl_list);
781  	locks_wake_up_blocks(fl);
782  }
783  
784  static void
785  locks_delete_lock_ctx(struct file_lock *fl, struct list_head *dispose)
786  {
787  	locks_unlink_lock_ctx(fl);
788  	if (dispose)
789  		list_add(&fl->fl_list, dispose);
790  	else
791  		locks_free_lock(fl);
792  }
793  
794  /* Determine if lock sys_fl blocks lock caller_fl. Common functionality
795   * checks for shared/exclusive status of overlapping locks.
796   */
797  static bool locks_conflict(struct file_lock *caller_fl,
798  			   struct file_lock *sys_fl)
799  {
800  	if (sys_fl->fl_type == F_WRLCK)
801  		return true;
802  	if (caller_fl->fl_type == F_WRLCK)
803  		return true;
804  	return false;
805  }
806  
807  /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
808   * checking before calling the locks_conflict().
809   */
810  static bool posix_locks_conflict(struct file_lock *caller_fl,
811  				 struct file_lock *sys_fl)
812  {
813  	/* POSIX locks owned by the same process do not conflict with
814  	 * each other.
815  	 */
816  	if (posix_same_owner(caller_fl, sys_fl))
817  		return false;
818  
819  	/* Check whether they overlap */
820  	if (!locks_overlap(caller_fl, sys_fl))
821  		return false;
822  
823  	return locks_conflict(caller_fl, sys_fl);
824  }
825  
826  /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
827   * checking before calling the locks_conflict().
828   */
829  static bool flock_locks_conflict(struct file_lock *caller_fl,
830  				 struct file_lock *sys_fl)
831  {
832  	/* FLOCK locks referring to the same filp do not conflict with
833  	 * each other.
834  	 */
835  	if (caller_fl->fl_file == sys_fl->fl_file)
836  		return false;
837  
838  	return locks_conflict(caller_fl, sys_fl);
839  }
840  
841  void
842  posix_test_lock(struct file *filp, struct file_lock *fl)
843  {
844  	struct file_lock *cfl;
845  	struct file_lock_context *ctx;
846  	struct inode *inode = locks_inode(filp);
847  
848  	ctx = smp_load_acquire(&inode->i_flctx);
849  	if (!ctx || list_empty_careful(&ctx->flc_posix)) {
850  		fl->fl_type = F_UNLCK;
851  		return;
852  	}
853  
854  	spin_lock(&ctx->flc_lock);
855  	list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
856  		if (posix_locks_conflict(fl, cfl)) {
857  			locks_copy_conflock(fl, cfl);
858  			goto out;
859  		}
860  	}
861  	fl->fl_type = F_UNLCK;
862  out:
863  	spin_unlock(&ctx->flc_lock);
864  	return;
865  }
866  EXPORT_SYMBOL(posix_test_lock);
867  
868  /*
869   * Deadlock detection:
870   *
871   * We attempt to detect deadlocks that are due purely to posix file
872   * locks.
873   *
874   * We assume that a task can be waiting for at most one lock at a time.
875   * So for any acquired lock, the process holding that lock may be
876   * waiting on at most one other lock.  That lock in turns may be held by
877   * someone waiting for at most one other lock.  Given a requested lock
878   * caller_fl which is about to wait for a conflicting lock block_fl, we
879   * follow this chain of waiters to ensure we are not about to create a
880   * cycle.
881   *
882   * Since we do this before we ever put a process to sleep on a lock, we
883   * are ensured that there is never a cycle; that is what guarantees that
884   * the while() loop in posix_locks_deadlock() eventually completes.
885   *
886   * Note: the above assumption may not be true when handling lock
887   * requests from a broken NFS client. It may also fail in the presence
888   * of tasks (such as posix threads) sharing the same open file table.
889   * To handle those cases, we just bail out after a few iterations.
890   *
891   * For FL_OFDLCK locks, the owner is the filp, not the files_struct.
892   * Because the owner is not even nominally tied to a thread of
893   * execution, the deadlock detection below can't reasonably work well. Just
894   * skip it for those.
895   *
896   * In principle, we could do a more limited deadlock detection on FL_OFDLCK
897   * locks that just checks for the case where two tasks are attempting to
898   * upgrade from read to write locks on the same inode.
899   */
900  
901  #define MAX_DEADLK_ITERATIONS 10
902  
903  /* Find a lock that the owner of the given block_fl is blocking on. */
904  static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl)
905  {
906  	struct file_lock *fl;
907  
908  	hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) {
909  		if (posix_same_owner(fl, block_fl)) {
910  			while (fl->fl_blocker)
911  				fl = fl->fl_blocker;
912  			return fl;
913  		}
914  	}
915  	return NULL;
916  }
917  
918  /* Must be called with the blocked_lock_lock held! */
919  static int posix_locks_deadlock(struct file_lock *caller_fl,
920  				struct file_lock *block_fl)
921  {
922  	int i = 0;
923  
924  	lockdep_assert_held(&blocked_lock_lock);
925  
926  	/*
927  	 * This deadlock detector can't reasonably detect deadlocks with
928  	 * FL_OFDLCK locks, since they aren't owned by a process, per-se.
929  	 */
930  	if (IS_OFDLCK(caller_fl))
931  		return 0;
932  
933  	while ((block_fl = what_owner_is_waiting_for(block_fl))) {
934  		if (i++ > MAX_DEADLK_ITERATIONS)
935  			return 0;
936  		if (posix_same_owner(caller_fl, block_fl))
937  			return 1;
938  	}
939  	return 0;
940  }
941  
942  /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
943   * after any leases, but before any posix locks.
944   *
945   * Note that if called with an FL_EXISTS argument, the caller may determine
946   * whether or not a lock was successfully freed by testing the return
947   * value for -ENOENT.
948   */
949  static int flock_lock_inode(struct inode *inode, struct file_lock *request)
950  {
951  	struct file_lock *new_fl = NULL;
952  	struct file_lock *fl;
953  	struct file_lock_context *ctx;
954  	int error = 0;
955  	bool found = false;
956  	LIST_HEAD(dispose);
957  
958  	ctx = locks_get_lock_context(inode, request->fl_type);
959  	if (!ctx) {
960  		if (request->fl_type != F_UNLCK)
961  			return -ENOMEM;
962  		return (request->fl_flags & FL_EXISTS) ? -ENOENT : 0;
963  	}
964  
965  	if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
966  		new_fl = locks_alloc_lock();
967  		if (!new_fl)
968  			return -ENOMEM;
969  	}
970  
971  	percpu_down_read(&file_rwsem);
972  	spin_lock(&ctx->flc_lock);
973  	if (request->fl_flags & FL_ACCESS)
974  		goto find_conflict;
975  
976  	list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
977  		if (request->fl_file != fl->fl_file)
978  			continue;
979  		if (request->fl_type == fl->fl_type)
980  			goto out;
981  		found = true;
982  		locks_delete_lock_ctx(fl, &dispose);
983  		break;
984  	}
985  
986  	if (request->fl_type == F_UNLCK) {
987  		if ((request->fl_flags & FL_EXISTS) && !found)
988  			error = -ENOENT;
989  		goto out;
990  	}
991  
992  find_conflict:
993  	list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
994  		if (!flock_locks_conflict(request, fl))
995  			continue;
996  		error = -EAGAIN;
997  		if (!(request->fl_flags & FL_SLEEP))
998  			goto out;
999  		error = FILE_LOCK_DEFERRED;
1000  		locks_insert_block(fl, request, flock_locks_conflict);
1001  		goto out;
1002  	}
1003  	if (request->fl_flags & FL_ACCESS)
1004  		goto out;
1005  	locks_copy_lock(new_fl, request);
1006  	locks_move_blocks(new_fl, request);
1007  	locks_insert_lock_ctx(new_fl, &ctx->flc_flock);
1008  	new_fl = NULL;
1009  	error = 0;
1010  
1011  out:
1012  	spin_unlock(&ctx->flc_lock);
1013  	percpu_up_read(&file_rwsem);
1014  	if (new_fl)
1015  		locks_free_lock(new_fl);
1016  	locks_dispose_list(&dispose);
1017  	trace_flock_lock_inode(inode, request, error);
1018  	return error;
1019  }
1020  
1021  static int posix_lock_inode(struct inode *inode, struct file_lock *request,
1022  			    struct file_lock *conflock)
1023  {
1024  	struct file_lock *fl, *tmp;
1025  	struct file_lock *new_fl = NULL;
1026  	struct file_lock *new_fl2 = NULL;
1027  	struct file_lock *left = NULL;
1028  	struct file_lock *right = NULL;
1029  	struct file_lock_context *ctx;
1030  	int error;
1031  	bool added = false;
1032  	LIST_HEAD(dispose);
1033  
1034  	ctx = locks_get_lock_context(inode, request->fl_type);
1035  	if (!ctx)
1036  		return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM;
1037  
1038  	/*
1039  	 * We may need two file_lock structures for this operation,
1040  	 * so we get them in advance to avoid races.
1041  	 *
1042  	 * In some cases we can be sure, that no new locks will be needed
1043  	 */
1044  	if (!(request->fl_flags & FL_ACCESS) &&
1045  	    (request->fl_type != F_UNLCK ||
1046  	     request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
1047  		new_fl = locks_alloc_lock();
1048  		new_fl2 = locks_alloc_lock();
1049  	}
1050  
1051  	percpu_down_read(&file_rwsem);
1052  	spin_lock(&ctx->flc_lock);
1053  	/*
1054  	 * New lock request. Walk all POSIX locks and look for conflicts. If
1055  	 * there are any, either return error or put the request on the
1056  	 * blocker's list of waiters and the global blocked_hash.
1057  	 */
1058  	if (request->fl_type != F_UNLCK) {
1059  		list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
1060  			if (!posix_locks_conflict(request, fl))
1061  				continue;
1062  			if (conflock)
1063  				locks_copy_conflock(conflock, fl);
1064  			error = -EAGAIN;
1065  			if (!(request->fl_flags & FL_SLEEP))
1066  				goto out;
1067  			/*
1068  			 * Deadlock detection and insertion into the blocked
1069  			 * locks list must be done while holding the same lock!
1070  			 */
1071  			error = -EDEADLK;
1072  			spin_lock(&blocked_lock_lock);
1073  			/*
1074  			 * Ensure that we don't find any locks blocked on this
1075  			 * request during deadlock detection.
1076  			 */
1077  			__locks_wake_up_blocks(request);
1078  			if (likely(!posix_locks_deadlock(request, fl))) {
1079  				error = FILE_LOCK_DEFERRED;
1080  				__locks_insert_block(fl, request,
1081  						     posix_locks_conflict);
1082  			}
1083  			spin_unlock(&blocked_lock_lock);
1084  			goto out;
1085  		}
1086  	}
1087  
1088  	/* If we're just looking for a conflict, we're done. */
1089  	error = 0;
1090  	if (request->fl_flags & FL_ACCESS)
1091  		goto out;
1092  
1093  	/* Find the first old lock with the same owner as the new lock */
1094  	list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
1095  		if (posix_same_owner(request, fl))
1096  			break;
1097  	}
1098  
1099  	/* Process locks with this owner. */
1100  	list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, fl_list) {
1101  		if (!posix_same_owner(request, fl))
1102  			break;
1103  
1104  		/* Detect adjacent or overlapping regions (if same lock type) */
1105  		if (request->fl_type == fl->fl_type) {
1106  			/* In all comparisons of start vs end, use
1107  			 * "start - 1" rather than "end + 1". If end
1108  			 * is OFFSET_MAX, end + 1 will become negative.
1109  			 */
1110  			if (fl->fl_end < request->fl_start - 1)
1111  				continue;
1112  			/* If the next lock in the list has entirely bigger
1113  			 * addresses than the new one, insert the lock here.
1114  			 */
1115  			if (fl->fl_start - 1 > request->fl_end)
1116  				break;
1117  
1118  			/* If we come here, the new and old lock are of the
1119  			 * same type and adjacent or overlapping. Make one
1120  			 * lock yielding from the lower start address of both
1121  			 * locks to the higher end address.
1122  			 */
1123  			if (fl->fl_start > request->fl_start)
1124  				fl->fl_start = request->fl_start;
1125  			else
1126  				request->fl_start = fl->fl_start;
1127  			if (fl->fl_end < request->fl_end)
1128  				fl->fl_end = request->fl_end;
1129  			else
1130  				request->fl_end = fl->fl_end;
1131  			if (added) {
1132  				locks_delete_lock_ctx(fl, &dispose);
1133  				continue;
1134  			}
1135  			request = fl;
1136  			added = true;
1137  		} else {
1138  			/* Processing for different lock types is a bit
1139  			 * more complex.
1140  			 */
1141  			if (fl->fl_end < request->fl_start)
1142  				continue;
1143  			if (fl->fl_start > request->fl_end)
1144  				break;
1145  			if (request->fl_type == F_UNLCK)
1146  				added = true;
1147  			if (fl->fl_start < request->fl_start)
1148  				left = fl;
1149  			/* If the next lock in the list has a higher end
1150  			 * address than the new one, insert the new one here.
1151  			 */
1152  			if (fl->fl_end > request->fl_end) {
1153  				right = fl;
1154  				break;
1155  			}
1156  			if (fl->fl_start >= request->fl_start) {
1157  				/* The new lock completely replaces an old
1158  				 * one (This may happen several times).
1159  				 */
1160  				if (added) {
1161  					locks_delete_lock_ctx(fl, &dispose);
1162  					continue;
1163  				}
1164  				/*
1165  				 * Replace the old lock with new_fl, and
1166  				 * remove the old one. It's safe to do the
1167  				 * insert here since we know that we won't be
1168  				 * using new_fl later, and that the lock is
1169  				 * just replacing an existing lock.
1170  				 */
1171  				error = -ENOLCK;
1172  				if (!new_fl)
1173  					goto out;
1174  				locks_copy_lock(new_fl, request);
1175  				locks_move_blocks(new_fl, request);
1176  				request = new_fl;
1177  				new_fl = NULL;
1178  				locks_insert_lock_ctx(request, &fl->fl_list);
1179  				locks_delete_lock_ctx(fl, &dispose);
1180  				added = true;
1181  			}
1182  		}
1183  	}
1184  
1185  	/*
1186  	 * The above code only modifies existing locks in case of merging or
1187  	 * replacing. If new lock(s) need to be inserted all modifications are
1188  	 * done below this, so it's safe yet to bail out.
1189  	 */
1190  	error = -ENOLCK; /* "no luck" */
1191  	if (right && left == right && !new_fl2)
1192  		goto out;
1193  
1194  	error = 0;
1195  	if (!added) {
1196  		if (request->fl_type == F_UNLCK) {
1197  			if (request->fl_flags & FL_EXISTS)
1198  				error = -ENOENT;
1199  			goto out;
1200  		}
1201  
1202  		if (!new_fl) {
1203  			error = -ENOLCK;
1204  			goto out;
1205  		}
1206  		locks_copy_lock(new_fl, request);
1207  		locks_move_blocks(new_fl, request);
1208  		locks_insert_lock_ctx(new_fl, &fl->fl_list);
1209  		fl = new_fl;
1210  		new_fl = NULL;
1211  	}
1212  	if (right) {
1213  		if (left == right) {
1214  			/* The new lock breaks the old one in two pieces,
1215  			 * so we have to use the second new lock.
1216  			 */
1217  			left = new_fl2;
1218  			new_fl2 = NULL;
1219  			locks_copy_lock(left, right);
1220  			locks_insert_lock_ctx(left, &fl->fl_list);
1221  		}
1222  		right->fl_start = request->fl_end + 1;
1223  		locks_wake_up_blocks(right);
1224  	}
1225  	if (left) {
1226  		left->fl_end = request->fl_start - 1;
1227  		locks_wake_up_blocks(left);
1228  	}
1229   out:
1230  	spin_unlock(&ctx->flc_lock);
1231  	percpu_up_read(&file_rwsem);
1232  	/*
1233  	 * Free any unused locks.
1234  	 */
1235  	if (new_fl)
1236  		locks_free_lock(new_fl);
1237  	if (new_fl2)
1238  		locks_free_lock(new_fl2);
1239  	locks_dispose_list(&dispose);
1240  	trace_posix_lock_inode(inode, request, error);
1241  
1242  	return error;
1243  }
1244  
1245  /**
1246   * posix_lock_file - Apply a POSIX-style lock to a file
1247   * @filp: The file to apply the lock to
1248   * @fl: The lock to be applied
1249   * @conflock: Place to return a copy of the conflicting lock, if found.
1250   *
1251   * Add a POSIX style lock to a file.
1252   * We merge adjacent & overlapping locks whenever possible.
1253   * POSIX locks are sorted by owner task, then by starting address
1254   *
1255   * Note that if called with an FL_EXISTS argument, the caller may determine
1256   * whether or not a lock was successfully freed by testing the return
1257   * value for -ENOENT.
1258   */
1259  int posix_lock_file(struct file *filp, struct file_lock *fl,
1260  			struct file_lock *conflock)
1261  {
1262  	return posix_lock_inode(locks_inode(filp), fl, conflock);
1263  }
1264  EXPORT_SYMBOL(posix_lock_file);
1265  
1266  /**
1267   * posix_lock_inode_wait - Apply a POSIX-style lock to a file
1268   * @inode: inode of file to which lock request should be applied
1269   * @fl: The lock to be applied
1270   *
1271   * Apply a POSIX style lock request to an inode.
1272   */
1273  static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
1274  {
1275  	int error;
1276  	might_sleep ();
1277  	for (;;) {
1278  		error = posix_lock_inode(inode, fl, NULL);
1279  		if (error != FILE_LOCK_DEFERRED)
1280  			break;
1281  		error = wait_event_interruptible(fl->fl_wait,
1282  					list_empty(&fl->fl_blocked_member));
1283  		if (error)
1284  			break;
1285  	}
1286  	locks_delete_block(fl);
1287  	return error;
1288  }
1289  
1290  static void lease_clear_pending(struct file_lock *fl, int arg)
1291  {
1292  	switch (arg) {
1293  	case F_UNLCK:
1294  		fl->fl_flags &= ~FL_UNLOCK_PENDING;
1295  		fallthrough;
1296  	case F_RDLCK:
1297  		fl->fl_flags &= ~FL_DOWNGRADE_PENDING;
1298  	}
1299  }
1300  
1301  /* We already had a lease on this file; just change its type */
1302  int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose)
1303  {
1304  	int error = assign_type(fl, arg);
1305  
1306  	if (error)
1307  		return error;
1308  	lease_clear_pending(fl, arg);
1309  	locks_wake_up_blocks(fl);
1310  	if (arg == F_UNLCK) {
1311  		struct file *filp = fl->fl_file;
1312  
1313  		f_delown(filp);
1314  		filp->f_owner.signum = 0;
1315  		fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync);
1316  		if (fl->fl_fasync != NULL) {
1317  			printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
1318  			fl->fl_fasync = NULL;
1319  		}
1320  		locks_delete_lock_ctx(fl, dispose);
1321  	}
1322  	return 0;
1323  }
1324  EXPORT_SYMBOL(lease_modify);
1325  
1326  static bool past_time(unsigned long then)
1327  {
1328  	if (!then)
1329  		/* 0 is a special value meaning "this never expires": */
1330  		return false;
1331  	return time_after(jiffies, then);
1332  }
1333  
1334  static void time_out_leases(struct inode *inode, struct list_head *dispose)
1335  {
1336  	struct file_lock_context *ctx = inode->i_flctx;
1337  	struct file_lock *fl, *tmp;
1338  
1339  	lockdep_assert_held(&ctx->flc_lock);
1340  
1341  	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
1342  		trace_time_out_leases(inode, fl);
1343  		if (past_time(fl->fl_downgrade_time))
1344  			lease_modify(fl, F_RDLCK, dispose);
1345  		if (past_time(fl->fl_break_time))
1346  			lease_modify(fl, F_UNLCK, dispose);
1347  	}
1348  }
1349  
1350  static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
1351  {
1352  	bool rc;
1353  
1354  	if (lease->fl_lmops->lm_breaker_owns_lease
1355  			&& lease->fl_lmops->lm_breaker_owns_lease(lease))
1356  		return false;
1357  	if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) {
1358  		rc = false;
1359  		goto trace;
1360  	}
1361  	if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) {
1362  		rc = false;
1363  		goto trace;
1364  	}
1365  
1366  	rc = locks_conflict(breaker, lease);
1367  trace:
1368  	trace_leases_conflict(rc, lease, breaker);
1369  	return rc;
1370  }
1371  
1372  static bool
1373  any_leases_conflict(struct inode *inode, struct file_lock *breaker)
1374  {
1375  	struct file_lock_context *ctx = inode->i_flctx;
1376  	struct file_lock *fl;
1377  
1378  	lockdep_assert_held(&ctx->flc_lock);
1379  
1380  	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1381  		if (leases_conflict(fl, breaker))
1382  			return true;
1383  	}
1384  	return false;
1385  }
1386  
1387  /**
1388   *	__break_lease	-	revoke all outstanding leases on file
1389   *	@inode: the inode of the file to return
1390   *	@mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
1391   *	    break all leases
1392   *	@type: FL_LEASE: break leases and delegations; FL_DELEG: break
1393   *	    only delegations
1394   *
1395   *	break_lease (inlined for speed) has checked there already is at least
1396   *	some kind of lock (maybe a lease) on this file.  Leases are broken on
1397   *	a call to open() or truncate().  This function can sleep unless you
1398   *	specified %O_NONBLOCK to your open().
1399   */
1400  int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1401  {
1402  	int error = 0;
1403  	struct file_lock_context *ctx;
1404  	struct file_lock *new_fl, *fl, *tmp;
1405  	unsigned long break_time;
1406  	int want_write = (mode & O_ACCMODE) != O_RDONLY;
1407  	LIST_HEAD(dispose);
1408  
1409  	new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
1410  	if (IS_ERR(new_fl))
1411  		return PTR_ERR(new_fl);
1412  	new_fl->fl_flags = type;
1413  
1414  	/* typically we will check that ctx is non-NULL before calling */
1415  	ctx = smp_load_acquire(&inode->i_flctx);
1416  	if (!ctx) {
1417  		WARN_ON_ONCE(1);
1418  		goto free_lock;
1419  	}
1420  
1421  	percpu_down_read(&file_rwsem);
1422  	spin_lock(&ctx->flc_lock);
1423  
1424  	time_out_leases(inode, &dispose);
1425  
1426  	if (!any_leases_conflict(inode, new_fl))
1427  		goto out;
1428  
1429  	break_time = 0;
1430  	if (lease_break_time > 0) {
1431  		break_time = jiffies + lease_break_time * HZ;
1432  		if (break_time == 0)
1433  			break_time++;	/* so that 0 means no break time */
1434  	}
1435  
1436  	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
1437  		if (!leases_conflict(fl, new_fl))
1438  			continue;
1439  		if (want_write) {
1440  			if (fl->fl_flags & FL_UNLOCK_PENDING)
1441  				continue;
1442  			fl->fl_flags |= FL_UNLOCK_PENDING;
1443  			fl->fl_break_time = break_time;
1444  		} else {
1445  			if (lease_breaking(fl))
1446  				continue;
1447  			fl->fl_flags |= FL_DOWNGRADE_PENDING;
1448  			fl->fl_downgrade_time = break_time;
1449  		}
1450  		if (fl->fl_lmops->lm_break(fl))
1451  			locks_delete_lock_ctx(fl, &dispose);
1452  	}
1453  
1454  	if (list_empty(&ctx->flc_lease))
1455  		goto out;
1456  
1457  	if (mode & O_NONBLOCK) {
1458  		trace_break_lease_noblock(inode, new_fl);
1459  		error = -EWOULDBLOCK;
1460  		goto out;
1461  	}
1462  
1463  restart:
1464  	fl = list_first_entry(&ctx->flc_lease, struct file_lock, fl_list);
1465  	break_time = fl->fl_break_time;
1466  	if (break_time != 0)
1467  		break_time -= jiffies;
1468  	if (break_time == 0)
1469  		break_time++;
1470  	locks_insert_block(fl, new_fl, leases_conflict);
1471  	trace_break_lease_block(inode, new_fl);
1472  	spin_unlock(&ctx->flc_lock);
1473  	percpu_up_read(&file_rwsem);
1474  
1475  	locks_dispose_list(&dispose);
1476  	error = wait_event_interruptible_timeout(new_fl->fl_wait,
1477  					list_empty(&new_fl->fl_blocked_member),
1478  					break_time);
1479  
1480  	percpu_down_read(&file_rwsem);
1481  	spin_lock(&ctx->flc_lock);
1482  	trace_break_lease_unblock(inode, new_fl);
1483  	locks_delete_block(new_fl);
1484  	if (error >= 0) {
1485  		/*
1486  		 * Wait for the next conflicting lease that has not been
1487  		 * broken yet
1488  		 */
1489  		if (error == 0)
1490  			time_out_leases(inode, &dispose);
1491  		if (any_leases_conflict(inode, new_fl))
1492  			goto restart;
1493  		error = 0;
1494  	}
1495  out:
1496  	spin_unlock(&ctx->flc_lock);
1497  	percpu_up_read(&file_rwsem);
1498  	locks_dispose_list(&dispose);
1499  free_lock:
1500  	locks_free_lock(new_fl);
1501  	return error;
1502  }
1503  EXPORT_SYMBOL(__break_lease);
1504  
1505  /**
1506   *	lease_get_mtime - update modified time of an inode with exclusive lease
1507   *	@inode: the inode
1508   *      @time:  pointer to a timespec which contains the last modified time
1509   *
1510   * This is to force NFS clients to flush their caches for files with
1511   * exclusive leases.  The justification is that if someone has an
1512   * exclusive lease, then they could be modifying it.
1513   */
1514  void lease_get_mtime(struct inode *inode, struct timespec64 *time)
1515  {
1516  	bool has_lease = false;
1517  	struct file_lock_context *ctx;
1518  	struct file_lock *fl;
1519  
1520  	ctx = smp_load_acquire(&inode->i_flctx);
1521  	if (ctx && !list_empty_careful(&ctx->flc_lease)) {
1522  		spin_lock(&ctx->flc_lock);
1523  		fl = list_first_entry_or_null(&ctx->flc_lease,
1524  					      struct file_lock, fl_list);
1525  		if (fl && (fl->fl_type == F_WRLCK))
1526  			has_lease = true;
1527  		spin_unlock(&ctx->flc_lock);
1528  	}
1529  
1530  	if (has_lease)
1531  		*time = current_time(inode);
1532  }
1533  EXPORT_SYMBOL(lease_get_mtime);
1534  
1535  /**
1536   *	fcntl_getlease - Enquire what lease is currently active
1537   *	@filp: the file
1538   *
1539   *	The value returned by this function will be one of
1540   *	(if no lease break is pending):
1541   *
1542   *	%F_RDLCK to indicate a shared lease is held.
1543   *
1544   *	%F_WRLCK to indicate an exclusive lease is held.
1545   *
1546   *	%F_UNLCK to indicate no lease is held.
1547   *
1548   *	(if a lease break is pending):
1549   *
1550   *	%F_RDLCK to indicate an exclusive lease needs to be
1551   *		changed to a shared lease (or removed).
1552   *
1553   *	%F_UNLCK to indicate the lease needs to be removed.
1554   *
1555   *	XXX: sfr & willy disagree over whether F_INPROGRESS
1556   *	should be returned to userspace.
1557   */
1558  int fcntl_getlease(struct file *filp)
1559  {
1560  	struct file_lock *fl;
1561  	struct inode *inode = locks_inode(filp);
1562  	struct file_lock_context *ctx;
1563  	int type = F_UNLCK;
1564  	LIST_HEAD(dispose);
1565  
1566  	ctx = smp_load_acquire(&inode->i_flctx);
1567  	if (ctx && !list_empty_careful(&ctx->flc_lease)) {
1568  		percpu_down_read(&file_rwsem);
1569  		spin_lock(&ctx->flc_lock);
1570  		time_out_leases(inode, &dispose);
1571  		list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1572  			if (fl->fl_file != filp)
1573  				continue;
1574  			type = target_leasetype(fl);
1575  			break;
1576  		}
1577  		spin_unlock(&ctx->flc_lock);
1578  		percpu_up_read(&file_rwsem);
1579  
1580  		locks_dispose_list(&dispose);
1581  	}
1582  	return type;
1583  }
1584  
1585  /**
1586   * check_conflicting_open - see if the given file points to an inode that has
1587   *			    an existing open that would conflict with the
1588   *			    desired lease.
1589   * @filp:	file to check
1590   * @arg:	type of lease that we're trying to acquire
1591   * @flags:	current lock flags
1592   *
1593   * Check to see if there's an existing open fd on this file that would
1594   * conflict with the lease we're trying to set.
1595   */
1596  static int
1597  check_conflicting_open(struct file *filp, const long arg, int flags)
1598  {
1599  	struct inode *inode = locks_inode(filp);
1600  	int self_wcount = 0, self_rcount = 0;
1601  
1602  	if (flags & FL_LAYOUT)
1603  		return 0;
1604  	if (flags & FL_DELEG)
1605  		/* We leave these checks to the caller */
1606  		return 0;
1607  
1608  	if (arg == F_RDLCK)
1609  		return inode_is_open_for_write(inode) ? -EAGAIN : 0;
1610  	else if (arg != F_WRLCK)
1611  		return 0;
1612  
1613  	/*
1614  	 * Make sure that only read/write count is from lease requestor.
1615  	 * Note that this will result in denying write leases when i_writecount
1616  	 * is negative, which is what we want.  (We shouldn't grant write leases
1617  	 * on files open for execution.)
1618  	 */
1619  	if (filp->f_mode & FMODE_WRITE)
1620  		self_wcount = 1;
1621  	else if (filp->f_mode & FMODE_READ)
1622  		self_rcount = 1;
1623  
1624  	if (atomic_read(&inode->i_writecount) != self_wcount ||
1625  	    atomic_read(&inode->i_readcount) != self_rcount)
1626  		return -EAGAIN;
1627  
1628  	return 0;
1629  }
1630  
1631  static int
1632  generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
1633  {
1634  	struct file_lock *fl, *my_fl = NULL, *lease;
1635  	struct inode *inode = locks_inode(filp);
1636  	struct file_lock_context *ctx;
1637  	bool is_deleg = (*flp)->fl_flags & FL_DELEG;
1638  	int error;
1639  	LIST_HEAD(dispose);
1640  
1641  	lease = *flp;
1642  	trace_generic_add_lease(inode, lease);
1643  
1644  	/* Note that arg is never F_UNLCK here */
1645  	ctx = locks_get_lock_context(inode, arg);
1646  	if (!ctx)
1647  		return -ENOMEM;
1648  
1649  	/*
1650  	 * In the delegation case we need mutual exclusion with
1651  	 * a number of operations that take the i_mutex.  We trylock
1652  	 * because delegations are an optional optimization, and if
1653  	 * there's some chance of a conflict--we'd rather not
1654  	 * bother, maybe that's a sign this just isn't a good file to
1655  	 * hand out a delegation on.
1656  	 */
1657  	if (is_deleg && !inode_trylock(inode))
1658  		return -EAGAIN;
1659  
1660  	if (is_deleg && arg == F_WRLCK) {
1661  		/* Write delegations are not currently supported: */
1662  		inode_unlock(inode);
1663  		WARN_ON_ONCE(1);
1664  		return -EINVAL;
1665  	}
1666  
1667  	percpu_down_read(&file_rwsem);
1668  	spin_lock(&ctx->flc_lock);
1669  	time_out_leases(inode, &dispose);
1670  	error = check_conflicting_open(filp, arg, lease->fl_flags);
1671  	if (error)
1672  		goto out;
1673  
1674  	/*
1675  	 * At this point, we know that if there is an exclusive
1676  	 * lease on this file, then we hold it on this filp
1677  	 * (otherwise our open of this file would have blocked).
1678  	 * And if we are trying to acquire an exclusive lease,
1679  	 * then the file is not open by anyone (including us)
1680  	 * except for this filp.
1681  	 */
1682  	error = -EAGAIN;
1683  	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1684  		if (fl->fl_file == filp &&
1685  		    fl->fl_owner == lease->fl_owner) {
1686  			my_fl = fl;
1687  			continue;
1688  		}
1689  
1690  		/*
1691  		 * No exclusive leases if someone else has a lease on
1692  		 * this file:
1693  		 */
1694  		if (arg == F_WRLCK)
1695  			goto out;
1696  		/*
1697  		 * Modifying our existing lease is OK, but no getting a
1698  		 * new lease if someone else is opening for write:
1699  		 */
1700  		if (fl->fl_flags & FL_UNLOCK_PENDING)
1701  			goto out;
1702  	}
1703  
1704  	if (my_fl != NULL) {
1705  		lease = my_fl;
1706  		error = lease->fl_lmops->lm_change(lease, arg, &dispose);
1707  		if (error)
1708  			goto out;
1709  		goto out_setup;
1710  	}
1711  
1712  	error = -EINVAL;
1713  	if (!leases_enable)
1714  		goto out;
1715  
1716  	locks_insert_lock_ctx(lease, &ctx->flc_lease);
1717  	/*
1718  	 * The check in break_lease() is lockless. It's possible for another
1719  	 * open to race in after we did the earlier check for a conflicting
1720  	 * open but before the lease was inserted. Check again for a
1721  	 * conflicting open and cancel the lease if there is one.
1722  	 *
1723  	 * We also add a barrier here to ensure that the insertion of the lock
1724  	 * precedes these checks.
1725  	 */
1726  	smp_mb();
1727  	error = check_conflicting_open(filp, arg, lease->fl_flags);
1728  	if (error) {
1729  		locks_unlink_lock_ctx(lease);
1730  		goto out;
1731  	}
1732  
1733  out_setup:
1734  	if (lease->fl_lmops->lm_setup)
1735  		lease->fl_lmops->lm_setup(lease, priv);
1736  out:
1737  	spin_unlock(&ctx->flc_lock);
1738  	percpu_up_read(&file_rwsem);
1739  	locks_dispose_list(&dispose);
1740  	if (is_deleg)
1741  		inode_unlock(inode);
1742  	if (!error && !my_fl)
1743  		*flp = NULL;
1744  	return error;
1745  }
1746  
1747  static int generic_delete_lease(struct file *filp, void *owner)
1748  {
1749  	int error = -EAGAIN;
1750  	struct file_lock *fl, *victim = NULL;
1751  	struct inode *inode = locks_inode(filp);
1752  	struct file_lock_context *ctx;
1753  	LIST_HEAD(dispose);
1754  
1755  	ctx = smp_load_acquire(&inode->i_flctx);
1756  	if (!ctx) {
1757  		trace_generic_delete_lease(inode, NULL);
1758  		return error;
1759  	}
1760  
1761  	percpu_down_read(&file_rwsem);
1762  	spin_lock(&ctx->flc_lock);
1763  	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1764  		if (fl->fl_file == filp &&
1765  		    fl->fl_owner == owner) {
1766  			victim = fl;
1767  			break;
1768  		}
1769  	}
1770  	trace_generic_delete_lease(inode, victim);
1771  	if (victim)
1772  		error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
1773  	spin_unlock(&ctx->flc_lock);
1774  	percpu_up_read(&file_rwsem);
1775  	locks_dispose_list(&dispose);
1776  	return error;
1777  }
1778  
1779  /**
1780   *	generic_setlease	-	sets a lease on an open file
1781   *	@filp:	file pointer
1782   *	@arg:	type of lease to obtain
1783   *	@flp:	input - file_lock to use, output - file_lock inserted
1784   *	@priv:	private data for lm_setup (may be NULL if lm_setup
1785   *		doesn't require it)
1786   *
1787   *	The (input) flp->fl_lmops->lm_break function is required
1788   *	by break_lease().
1789   */
1790  int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
1791  			void **priv)
1792  {
1793  	struct inode *inode = locks_inode(filp);
1794  	int error;
1795  
1796  	if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
1797  		return -EACCES;
1798  	if (!S_ISREG(inode->i_mode))
1799  		return -EINVAL;
1800  	error = security_file_lock(filp, arg);
1801  	if (error)
1802  		return error;
1803  
1804  	switch (arg) {
1805  	case F_UNLCK:
1806  		return generic_delete_lease(filp, *priv);
1807  	case F_RDLCK:
1808  	case F_WRLCK:
1809  		if (!(*flp)->fl_lmops->lm_break) {
1810  			WARN_ON_ONCE(1);
1811  			return -ENOLCK;
1812  		}
1813  
1814  		return generic_add_lease(filp, arg, flp, priv);
1815  	default:
1816  		return -EINVAL;
1817  	}
1818  }
1819  EXPORT_SYMBOL(generic_setlease);
1820  
1821  #if IS_ENABLED(CONFIG_SRCU)
1822  /*
1823   * Kernel subsystems can register to be notified on any attempt to set
1824   * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
1825   * to close files that it may have cached when there is an attempt to set a
1826   * conflicting lease.
1827   */
1828  static struct srcu_notifier_head lease_notifier_chain;
1829  
1830  static inline void
1831  lease_notifier_chain_init(void)
1832  {
1833  	srcu_init_notifier_head(&lease_notifier_chain);
1834  }
1835  
1836  static inline void
1837  setlease_notifier(long arg, struct file_lock *lease)
1838  {
1839  	if (arg != F_UNLCK)
1840  		srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
1841  }
1842  
1843  int lease_register_notifier(struct notifier_block *nb)
1844  {
1845  	return srcu_notifier_chain_register(&lease_notifier_chain, nb);
1846  }
1847  EXPORT_SYMBOL_GPL(lease_register_notifier);
1848  
1849  void lease_unregister_notifier(struct notifier_block *nb)
1850  {
1851  	srcu_notifier_chain_unregister(&lease_notifier_chain, nb);
1852  }
1853  EXPORT_SYMBOL_GPL(lease_unregister_notifier);
1854  
1855  #else /* !IS_ENABLED(CONFIG_SRCU) */
1856  static inline void
1857  lease_notifier_chain_init(void)
1858  {
1859  }
1860  
1861  static inline void
1862  setlease_notifier(long arg, struct file_lock *lease)
1863  {
1864  }
1865  
1866  int lease_register_notifier(struct notifier_block *nb)
1867  {
1868  	return 0;
1869  }
1870  EXPORT_SYMBOL_GPL(lease_register_notifier);
1871  
1872  void lease_unregister_notifier(struct notifier_block *nb)
1873  {
1874  }
1875  EXPORT_SYMBOL_GPL(lease_unregister_notifier);
1876  
1877  #endif /* IS_ENABLED(CONFIG_SRCU) */
1878  
1879  /**
1880   * vfs_setlease        -       sets a lease on an open file
1881   * @filp:	file pointer
1882   * @arg:	type of lease to obtain
1883   * @lease:	file_lock to use when adding a lease
1884   * @priv:	private info for lm_setup when adding a lease (may be
1885   *		NULL if lm_setup doesn't require it)
1886   *
1887   * Call this to establish a lease on the file. The "lease" argument is not
1888   * used for F_UNLCK requests and may be NULL. For commands that set or alter
1889   * an existing lease, the ``(*lease)->fl_lmops->lm_break`` operation must be
1890   * set; if not, this function will return -ENOLCK (and generate a scary-looking
1891   * stack trace).
1892   *
1893   * The "priv" pointer is passed directly to the lm_setup function as-is. It
1894   * may be NULL if the lm_setup operation doesn't require it.
1895   */
1896  int
1897  vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
1898  {
1899  	if (lease)
1900  		setlease_notifier(arg, *lease);
1901  	if (filp->f_op->setlease)
1902  		return filp->f_op->setlease(filp, arg, lease, priv);
1903  	else
1904  		return generic_setlease(filp, arg, lease, priv);
1905  }
1906  EXPORT_SYMBOL_GPL(vfs_setlease);
1907  
1908  static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
1909  {
1910  	struct file_lock *fl;
1911  	struct fasync_struct *new;
1912  	int error;
1913  
1914  	fl = lease_alloc(filp, arg);
1915  	if (IS_ERR(fl))
1916  		return PTR_ERR(fl);
1917  
1918  	new = fasync_alloc();
1919  	if (!new) {
1920  		locks_free_lock(fl);
1921  		return -ENOMEM;
1922  	}
1923  	new->fa_fd = fd;
1924  
1925  	error = vfs_setlease(filp, arg, &fl, (void **)&new);
1926  	if (fl)
1927  		locks_free_lock(fl);
1928  	if (new)
1929  		fasync_free(new);
1930  	return error;
1931  }
1932  
1933  /**
1934   *	fcntl_setlease	-	sets a lease on an open file
1935   *	@fd: open file descriptor
1936   *	@filp: file pointer
1937   *	@arg: type of lease to obtain
1938   *
1939   *	Call this fcntl to establish a lease on the file.
1940   *	Note that you also need to call %F_SETSIG to
1941   *	receive a signal when the lease is broken.
1942   */
1943  int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1944  {
1945  	if (arg == F_UNLCK)
1946  		return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
1947  	return do_fcntl_add_lease(fd, filp, arg);
1948  }
1949  
1950  /**
1951   * flock_lock_inode_wait - Apply a FLOCK-style lock to a file
1952   * @inode: inode of the file to apply to
1953   * @fl: The lock to be applied
1954   *
1955   * Apply a FLOCK style lock request to an inode.
1956   */
1957  static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
1958  {
1959  	int error;
1960  	might_sleep();
1961  	for (;;) {
1962  		error = flock_lock_inode(inode, fl);
1963  		if (error != FILE_LOCK_DEFERRED)
1964  			break;
1965  		error = wait_event_interruptible(fl->fl_wait,
1966  				list_empty(&fl->fl_blocked_member));
1967  		if (error)
1968  			break;
1969  	}
1970  	locks_delete_block(fl);
1971  	return error;
1972  }
1973  
1974  /**
1975   * locks_lock_inode_wait - Apply a lock to an inode
1976   * @inode: inode of the file to apply to
1977   * @fl: The lock to be applied
1978   *
1979   * Apply a POSIX or FLOCK style lock request to an inode.
1980   */
1981  int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
1982  {
1983  	int res = 0;
1984  	switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
1985  		case FL_POSIX:
1986  			res = posix_lock_inode_wait(inode, fl);
1987  			break;
1988  		case FL_FLOCK:
1989  			res = flock_lock_inode_wait(inode, fl);
1990  			break;
1991  		default:
1992  			BUG();
1993  	}
1994  	return res;
1995  }
1996  EXPORT_SYMBOL(locks_lock_inode_wait);
1997  
1998  /**
1999   *	sys_flock: - flock() system call.
2000   *	@fd: the file descriptor to lock.
2001   *	@cmd: the type of lock to apply.
2002   *
2003   *	Apply a %FL_FLOCK style lock to an open file descriptor.
2004   *	The @cmd can be one of:
2005   *
2006   *	- %LOCK_SH -- a shared lock.
2007   *	- %LOCK_EX -- an exclusive lock.
2008   *	- %LOCK_UN -- remove an existing lock.
2009   *	- %LOCK_MAND -- a 'mandatory' flock. (DEPRECATED)
2010   *
2011   *	%LOCK_MAND support has been removed from the kernel.
2012   */
2013  SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
2014  {
2015  	struct fd f = fdget(fd);
2016  	struct file_lock *lock;
2017  	int can_sleep, unlock;
2018  	int error;
2019  
2020  	error = -EBADF;
2021  	if (!f.file)
2022  		goto out;
2023  
2024  	can_sleep = !(cmd & LOCK_NB);
2025  	cmd &= ~LOCK_NB;
2026  	unlock = (cmd == LOCK_UN);
2027  
2028  	if (!unlock && !(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
2029  		goto out_putf;
2030  
2031  	/*
2032  	 * LOCK_MAND locks were broken for a long time in that they never
2033  	 * conflicted with one another and didn't prevent any sort of open,
2034  	 * read or write activity.
2035  	 *
2036  	 * Just ignore these requests now, to preserve legacy behavior, but
2037  	 * throw a warning to let people know that they don't actually work.
2038  	 */
2039  	if (cmd & LOCK_MAND) {
2040  		pr_warn_once("Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n");
2041  		error = 0;
2042  		goto out_putf;
2043  	}
2044  
2045  	lock = flock_make_lock(f.file, cmd, NULL);
2046  	if (IS_ERR(lock)) {
2047  		error = PTR_ERR(lock);
2048  		goto out_putf;
2049  	}
2050  
2051  	if (can_sleep)
2052  		lock->fl_flags |= FL_SLEEP;
2053  
2054  	error = security_file_lock(f.file, lock->fl_type);
2055  	if (error)
2056  		goto out_free;
2057  
2058  	if (f.file->f_op->flock)
2059  		error = f.file->f_op->flock(f.file,
2060  					  (can_sleep) ? F_SETLKW : F_SETLK,
2061  					  lock);
2062  	else
2063  		error = locks_lock_file_wait(f.file, lock);
2064  
2065   out_free:
2066  	locks_free_lock(lock);
2067  
2068   out_putf:
2069  	fdput(f);
2070   out:
2071  	return error;
2072  }
2073  
2074  /**
2075   * vfs_test_lock - test file byte range lock
2076   * @filp: The file to test lock for
2077   * @fl: The lock to test; also used to hold result
2078   *
2079   * Returns -ERRNO on failure.  Indicates presence of conflicting lock by
2080   * setting conf->fl_type to something other than F_UNLCK.
2081   */
2082  int vfs_test_lock(struct file *filp, struct file_lock *fl)
2083  {
2084  	if (filp->f_op->lock)
2085  		return filp->f_op->lock(filp, F_GETLK, fl);
2086  	posix_test_lock(filp, fl);
2087  	return 0;
2088  }
2089  EXPORT_SYMBOL_GPL(vfs_test_lock);
2090  
2091  /**
2092   * locks_translate_pid - translate a file_lock's fl_pid number into a namespace
2093   * @fl: The file_lock who's fl_pid should be translated
2094   * @ns: The namespace into which the pid should be translated
2095   *
2096   * Used to tranlate a fl_pid into a namespace virtual pid number
2097   */
2098  static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns)
2099  {
2100  	pid_t vnr;
2101  	struct pid *pid;
2102  
2103  	if (IS_OFDLCK(fl))
2104  		return -1;
2105  	if (IS_REMOTELCK(fl))
2106  		return fl->fl_pid;
2107  	/*
2108  	 * If the flock owner process is dead and its pid has been already
2109  	 * freed, the translation below won't work, but we still want to show
2110  	 * flock owner pid number in init pidns.
2111  	 */
2112  	if (ns == &init_pid_ns)
2113  		return (pid_t)fl->fl_pid;
2114  
2115  	rcu_read_lock();
2116  	pid = find_pid_ns(fl->fl_pid, &init_pid_ns);
2117  	vnr = pid_nr_ns(pid, ns);
2118  	rcu_read_unlock();
2119  	return vnr;
2120  }
2121  
2122  static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
2123  {
2124  	flock->l_pid = locks_translate_pid(fl, task_active_pid_ns(current));
2125  #if BITS_PER_LONG == 32
2126  	/*
2127  	 * Make sure we can represent the posix lock via
2128  	 * legacy 32bit flock.
2129  	 */
2130  	if (fl->fl_start > OFFT_OFFSET_MAX)
2131  		return -EOVERFLOW;
2132  	if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX)
2133  		return -EOVERFLOW;
2134  #endif
2135  	flock->l_start = fl->fl_start;
2136  	flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
2137  		fl->fl_end - fl->fl_start + 1;
2138  	flock->l_whence = 0;
2139  	flock->l_type = fl->fl_type;
2140  	return 0;
2141  }
2142  
2143  #if BITS_PER_LONG == 32
2144  static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
2145  {
2146  	flock->l_pid = locks_translate_pid(fl, task_active_pid_ns(current));
2147  	flock->l_start = fl->fl_start;
2148  	flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
2149  		fl->fl_end - fl->fl_start + 1;
2150  	flock->l_whence = 0;
2151  	flock->l_type = fl->fl_type;
2152  }
2153  #endif
2154  
2155  /* Report the first existing lock that would conflict with l.
2156   * This implements the F_GETLK command of fcntl().
2157   */
2158  int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock)
2159  {
2160  	struct file_lock *fl;
2161  	int error;
2162  
2163  	fl = locks_alloc_lock();
2164  	if (fl == NULL)
2165  		return -ENOMEM;
2166  	error = -EINVAL;
2167  	if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
2168  		goto out;
2169  
2170  	error = flock_to_posix_lock(filp, fl, flock);
2171  	if (error)
2172  		goto out;
2173  
2174  	if (cmd == F_OFD_GETLK) {
2175  		error = -EINVAL;
2176  		if (flock->l_pid != 0)
2177  			goto out;
2178  
2179  		fl->fl_flags |= FL_OFDLCK;
2180  		fl->fl_owner = filp;
2181  	}
2182  
2183  	error = vfs_test_lock(filp, fl);
2184  	if (error)
2185  		goto out;
2186  
2187  	flock->l_type = fl->fl_type;
2188  	if (fl->fl_type != F_UNLCK) {
2189  		error = posix_lock_to_flock(flock, fl);
2190  		if (error)
2191  			goto out;
2192  	}
2193  out:
2194  	locks_free_lock(fl);
2195  	return error;
2196  }
2197  
2198  /**
2199   * vfs_lock_file - file byte range lock
2200   * @filp: The file to apply the lock to
2201   * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.)
2202   * @fl: The lock to be applied
2203   * @conf: Place to return a copy of the conflicting lock, if found.
2204   *
2205   * A caller that doesn't care about the conflicting lock may pass NULL
2206   * as the final argument.
2207   *
2208   * If the filesystem defines a private ->lock() method, then @conf will
2209   * be left unchanged; so a caller that cares should initialize it to
2210   * some acceptable default.
2211   *
2212   * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX
2213   * locks, the ->lock() interface may return asynchronously, before the lock has
2214   * been granted or denied by the underlying filesystem, if (and only if)
2215   * lm_grant is set. Callers expecting ->lock() to return asynchronously
2216   * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
2217   * the request is for a blocking lock. When ->lock() does return asynchronously,
2218   * it must return FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock
2219   * request completes.
2220   * If the request is for non-blocking lock the file system should return
2221   * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
2222   * with the result. If the request timed out the callback routine will return a
2223   * nonzero return code and the file system should release the lock. The file
2224   * system is also responsible to keep a corresponding posix lock when it
2225   * grants a lock so the VFS can find out which locks are locally held and do
2226   * the correct lock cleanup when required.
2227   * The underlying filesystem must not drop the kernel lock or call
2228   * ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED
2229   * return code.
2230   */
2231  int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
2232  {
2233  	if (filp->f_op->lock)
2234  		return filp->f_op->lock(filp, cmd, fl);
2235  	else
2236  		return posix_lock_file(filp, fl, conf);
2237  }
2238  EXPORT_SYMBOL_GPL(vfs_lock_file);
2239  
2240  static int do_lock_file_wait(struct file *filp, unsigned int cmd,
2241  			     struct file_lock *fl)
2242  {
2243  	int error;
2244  
2245  	error = security_file_lock(filp, fl->fl_type);
2246  	if (error)
2247  		return error;
2248  
2249  	for (;;) {
2250  		error = vfs_lock_file(filp, cmd, fl, NULL);
2251  		if (error != FILE_LOCK_DEFERRED)
2252  			break;
2253  		error = wait_event_interruptible(fl->fl_wait,
2254  					list_empty(&fl->fl_blocked_member));
2255  		if (error)
2256  			break;
2257  	}
2258  	locks_delete_block(fl);
2259  
2260  	return error;
2261  }
2262  
2263  /* Ensure that fl->fl_file has compatible f_mode for F_SETLK calls */
2264  static int
2265  check_fmode_for_setlk(struct file_lock *fl)
2266  {
2267  	switch (fl->fl_type) {
2268  	case F_RDLCK:
2269  		if (!(fl->fl_file->f_mode & FMODE_READ))
2270  			return -EBADF;
2271  		break;
2272  	case F_WRLCK:
2273  		if (!(fl->fl_file->f_mode & FMODE_WRITE))
2274  			return -EBADF;
2275  	}
2276  	return 0;
2277  }
2278  
2279  /* Apply the lock described by l to an open file descriptor.
2280   * This implements both the F_SETLK and F_SETLKW commands of fcntl().
2281   */
2282  int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
2283  		struct flock *flock)
2284  {
2285  	struct file_lock *file_lock = locks_alloc_lock();
2286  	struct inode *inode = locks_inode(filp);
2287  	struct file *f;
2288  	int error;
2289  
2290  	if (file_lock == NULL)
2291  		return -ENOLCK;
2292  
2293  	error = flock_to_posix_lock(filp, file_lock, flock);
2294  	if (error)
2295  		goto out;
2296  
2297  	error = check_fmode_for_setlk(file_lock);
2298  	if (error)
2299  		goto out;
2300  
2301  	/*
2302  	 * If the cmd is requesting file-private locks, then set the
2303  	 * FL_OFDLCK flag and override the owner.
2304  	 */
2305  	switch (cmd) {
2306  	case F_OFD_SETLK:
2307  		error = -EINVAL;
2308  		if (flock->l_pid != 0)
2309  			goto out;
2310  
2311  		cmd = F_SETLK;
2312  		file_lock->fl_flags |= FL_OFDLCK;
2313  		file_lock->fl_owner = filp;
2314  		break;
2315  	case F_OFD_SETLKW:
2316  		error = -EINVAL;
2317  		if (flock->l_pid != 0)
2318  			goto out;
2319  
2320  		cmd = F_SETLKW;
2321  		file_lock->fl_flags |= FL_OFDLCK;
2322  		file_lock->fl_owner = filp;
2323  		fallthrough;
2324  	case F_SETLKW:
2325  		file_lock->fl_flags |= FL_SLEEP;
2326  	}
2327  
2328  	error = do_lock_file_wait(filp, cmd, file_lock);
2329  
2330  	/*
2331  	 * Attempt to detect a close/fcntl race and recover by releasing the
2332  	 * lock that was just acquired. There is no need to do that when we're
2333  	 * unlocking though, or for OFD locks.
2334  	 */
2335  	if (!error && file_lock->fl_type != F_UNLCK &&
2336  	    !(file_lock->fl_flags & FL_OFDLCK)) {
2337  		struct files_struct *files = current->files;
2338  		/*
2339  		 * We need that spin_lock here - it prevents reordering between
2340  		 * update of i_flctx->flc_posix and check for it done in
2341  		 * close(). rcu_read_lock() wouldn't do.
2342  		 */
2343  		spin_lock(&files->file_lock);
2344  		f = files_lookup_fd_locked(files, fd);
2345  		spin_unlock(&files->file_lock);
2346  		if (f != filp) {
2347  			file_lock->fl_type = F_UNLCK;
2348  			error = do_lock_file_wait(filp, cmd, file_lock);
2349  			WARN_ON_ONCE(error);
2350  			error = -EBADF;
2351  		}
2352  	}
2353  out:
2354  	trace_fcntl_setlk(inode, file_lock, error);
2355  	locks_free_lock(file_lock);
2356  	return error;
2357  }
2358  
2359  #if BITS_PER_LONG == 32
2360  /* Report the first existing lock that would conflict with l.
2361   * This implements the F_GETLK command of fcntl().
2362   */
2363  int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock)
2364  {
2365  	struct file_lock *fl;
2366  	int error;
2367  
2368  	fl = locks_alloc_lock();
2369  	if (fl == NULL)
2370  		return -ENOMEM;
2371  
2372  	error = -EINVAL;
2373  	if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
2374  		goto out;
2375  
2376  	error = flock64_to_posix_lock(filp, fl, flock);
2377  	if (error)
2378  		goto out;
2379  
2380  	if (cmd == F_OFD_GETLK) {
2381  		error = -EINVAL;
2382  		if (flock->l_pid != 0)
2383  			goto out;
2384  
2385  		cmd = F_GETLK64;
2386  		fl->fl_flags |= FL_OFDLCK;
2387  		fl->fl_owner = filp;
2388  	}
2389  
2390  	error = vfs_test_lock(filp, fl);
2391  	if (error)
2392  		goto out;
2393  
2394  	flock->l_type = fl->fl_type;
2395  	if (fl->fl_type != F_UNLCK)
2396  		posix_lock_to_flock64(flock, fl);
2397  
2398  out:
2399  	locks_free_lock(fl);
2400  	return error;
2401  }
2402  
2403  /* Apply the lock described by l to an open file descriptor.
2404   * This implements both the F_SETLK and F_SETLKW commands of fcntl().
2405   */
2406  int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
2407  		struct flock64 *flock)
2408  {
2409  	struct file_lock *file_lock = locks_alloc_lock();
2410  	struct file *f;
2411  	int error;
2412  
2413  	if (file_lock == NULL)
2414  		return -ENOLCK;
2415  
2416  	error = flock64_to_posix_lock(filp, file_lock, flock);
2417  	if (error)
2418  		goto out;
2419  
2420  	error = check_fmode_for_setlk(file_lock);
2421  	if (error)
2422  		goto out;
2423  
2424  	/*
2425  	 * If the cmd is requesting file-private locks, then set the
2426  	 * FL_OFDLCK flag and override the owner.
2427  	 */
2428  	switch (cmd) {
2429  	case F_OFD_SETLK:
2430  		error = -EINVAL;
2431  		if (flock->l_pid != 0)
2432  			goto out;
2433  
2434  		cmd = F_SETLK64;
2435  		file_lock->fl_flags |= FL_OFDLCK;
2436  		file_lock->fl_owner = filp;
2437  		break;
2438  	case F_OFD_SETLKW:
2439  		error = -EINVAL;
2440  		if (flock->l_pid != 0)
2441  			goto out;
2442  
2443  		cmd = F_SETLKW64;
2444  		file_lock->fl_flags |= FL_OFDLCK;
2445  		file_lock->fl_owner = filp;
2446  		fallthrough;
2447  	case F_SETLKW64:
2448  		file_lock->fl_flags |= FL_SLEEP;
2449  	}
2450  
2451  	error = do_lock_file_wait(filp, cmd, file_lock);
2452  
2453  	/*
2454  	 * Attempt to detect a close/fcntl race and recover by releasing the
2455  	 * lock that was just acquired. There is no need to do that when we're
2456  	 * unlocking though, or for OFD locks.
2457  	 */
2458  	if (!error && file_lock->fl_type != F_UNLCK &&
2459  	    !(file_lock->fl_flags & FL_OFDLCK)) {
2460  		struct files_struct *files = current->files;
2461  		/*
2462  		 * We need that spin_lock here - it prevents reordering between
2463  		 * update of i_flctx->flc_posix and check for it done in
2464  		 * close(). rcu_read_lock() wouldn't do.
2465  		 */
2466  		spin_lock(&files->file_lock);
2467  		f = files_lookup_fd_locked(files, fd);
2468  		spin_unlock(&files->file_lock);
2469  		if (f != filp) {
2470  			file_lock->fl_type = F_UNLCK;
2471  			error = do_lock_file_wait(filp, cmd, file_lock);
2472  			WARN_ON_ONCE(error);
2473  			error = -EBADF;
2474  		}
2475  	}
2476  out:
2477  	locks_free_lock(file_lock);
2478  	return error;
2479  }
2480  #endif /* BITS_PER_LONG == 32 */
2481  
2482  /*
2483   * This function is called when the file is being removed
2484   * from the task's fd array.  POSIX locks belonging to this task
2485   * are deleted at this time.
2486   */
2487  void locks_remove_posix(struct file *filp, fl_owner_t owner)
2488  {
2489  	int error;
2490  	struct inode *inode = locks_inode(filp);
2491  	struct file_lock lock;
2492  	struct file_lock_context *ctx;
2493  
2494  	/*
2495  	 * If there are no locks held on this file, we don't need to call
2496  	 * posix_lock_file().  Another process could be setting a lock on this
2497  	 * file at the same time, but we wouldn't remove that lock anyway.
2498  	 */
2499  	ctx =  smp_load_acquire(&inode->i_flctx);
2500  	if (!ctx || list_empty(&ctx->flc_posix))
2501  		return;
2502  
2503  	locks_init_lock(&lock);
2504  	lock.fl_type = F_UNLCK;
2505  	lock.fl_flags = FL_POSIX | FL_CLOSE;
2506  	lock.fl_start = 0;
2507  	lock.fl_end = OFFSET_MAX;
2508  	lock.fl_owner = owner;
2509  	lock.fl_pid = current->tgid;
2510  	lock.fl_file = filp;
2511  	lock.fl_ops = NULL;
2512  	lock.fl_lmops = NULL;
2513  
2514  	error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
2515  
2516  	if (lock.fl_ops && lock.fl_ops->fl_release_private)
2517  		lock.fl_ops->fl_release_private(&lock);
2518  	trace_locks_remove_posix(inode, &lock, error);
2519  }
2520  EXPORT_SYMBOL(locks_remove_posix);
2521  
2522  /* The i_flctx must be valid when calling into here */
2523  static void
2524  locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
2525  {
2526  	struct file_lock fl;
2527  	struct inode *inode = locks_inode(filp);
2528  
2529  	if (list_empty(&flctx->flc_flock))
2530  		return;
2531  
2532  	flock_make_lock(filp, LOCK_UN, &fl);
2533  	fl.fl_flags |= FL_CLOSE;
2534  
2535  	if (filp->f_op->flock)
2536  		filp->f_op->flock(filp, F_SETLKW, &fl);
2537  	else
2538  		flock_lock_inode(inode, &fl);
2539  
2540  	if (fl.fl_ops && fl.fl_ops->fl_release_private)
2541  		fl.fl_ops->fl_release_private(&fl);
2542  }
2543  
2544  /* The i_flctx must be valid when calling into here */
2545  static void
2546  locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
2547  {
2548  	struct file_lock *fl, *tmp;
2549  	LIST_HEAD(dispose);
2550  
2551  	if (list_empty(&ctx->flc_lease))
2552  		return;
2553  
2554  	percpu_down_read(&file_rwsem);
2555  	spin_lock(&ctx->flc_lock);
2556  	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list)
2557  		if (filp == fl->fl_file)
2558  			lease_modify(fl, F_UNLCK, &dispose);
2559  	spin_unlock(&ctx->flc_lock);
2560  	percpu_up_read(&file_rwsem);
2561  
2562  	locks_dispose_list(&dispose);
2563  }
2564  
2565  /*
2566   * This function is called on the last close of an open file.
2567   */
2568  void locks_remove_file(struct file *filp)
2569  {
2570  	struct file_lock_context *ctx;
2571  
2572  	ctx = smp_load_acquire(&locks_inode(filp)->i_flctx);
2573  	if (!ctx)
2574  		return;
2575  
2576  	/* remove any OFD locks */
2577  	locks_remove_posix(filp, filp);
2578  
2579  	/* remove flock locks */
2580  	locks_remove_flock(filp, ctx);
2581  
2582  	/* remove any leases */
2583  	locks_remove_lease(filp, ctx);
2584  
2585  	spin_lock(&ctx->flc_lock);
2586  	locks_check_ctx_file_list(filp, &ctx->flc_posix, "POSIX");
2587  	locks_check_ctx_file_list(filp, &ctx->flc_flock, "FLOCK");
2588  	locks_check_ctx_file_list(filp, &ctx->flc_lease, "LEASE");
2589  	spin_unlock(&ctx->flc_lock);
2590  }
2591  
2592  /**
2593   * vfs_cancel_lock - file byte range unblock lock
2594   * @filp: The file to apply the unblock to
2595   * @fl: The lock to be unblocked
2596   *
2597   * Used by lock managers to cancel blocked requests
2598   */
2599  int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
2600  {
2601  	if (filp->f_op->lock)
2602  		return filp->f_op->lock(filp, F_CANCELLK, fl);
2603  	return 0;
2604  }
2605  EXPORT_SYMBOL_GPL(vfs_cancel_lock);
2606  
2607  #ifdef CONFIG_PROC_FS
2608  #include <linux/proc_fs.h>
2609  #include <linux/seq_file.h>
2610  
2611  struct locks_iterator {
2612  	int	li_cpu;
2613  	loff_t	li_pos;
2614  };
2615  
2616  static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2617  			    loff_t id, char *pfx, int repeat)
2618  {
2619  	struct inode *inode = NULL;
2620  	unsigned int fl_pid;
2621  	struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
2622  	int type;
2623  
2624  	fl_pid = locks_translate_pid(fl, proc_pidns);
2625  	/*
2626  	 * If lock owner is dead (and pid is freed) or not visible in current
2627  	 * pidns, zero is shown as a pid value. Check lock info from
2628  	 * init_pid_ns to get saved lock pid value.
2629  	 */
2630  
2631  	if (fl->fl_file != NULL)
2632  		inode = locks_inode(fl->fl_file);
2633  
2634  	seq_printf(f, "%lld: ", id);
2635  
2636  	if (repeat)
2637  		seq_printf(f, "%*s", repeat - 1 + (int)strlen(pfx), pfx);
2638  
2639  	if (IS_POSIX(fl)) {
2640  		if (fl->fl_flags & FL_ACCESS)
2641  			seq_puts(f, "ACCESS");
2642  		else if (IS_OFDLCK(fl))
2643  			seq_puts(f, "OFDLCK");
2644  		else
2645  			seq_puts(f, "POSIX ");
2646  
2647  		seq_printf(f, " %s ",
2648  			     (inode == NULL) ? "*NOINODE*" : "ADVISORY ");
2649  	} else if (IS_FLOCK(fl)) {
2650  		seq_puts(f, "FLOCK  ADVISORY  ");
2651  	} else if (IS_LEASE(fl)) {
2652  		if (fl->fl_flags & FL_DELEG)
2653  			seq_puts(f, "DELEG  ");
2654  		else
2655  			seq_puts(f, "LEASE  ");
2656  
2657  		if (lease_breaking(fl))
2658  			seq_puts(f, "BREAKING  ");
2659  		else if (fl->fl_file)
2660  			seq_puts(f, "ACTIVE    ");
2661  		else
2662  			seq_puts(f, "BREAKER   ");
2663  	} else {
2664  		seq_puts(f, "UNKNOWN UNKNOWN  ");
2665  	}
2666  	type = IS_LEASE(fl) ? target_leasetype(fl) : fl->fl_type;
2667  
2668  	seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" :
2669  			     (type == F_RDLCK) ? "READ" : "UNLCK");
2670  	if (inode) {
2671  		/* userspace relies on this representation of dev_t */
2672  		seq_printf(f, "%d %02x:%02x:%lu ", fl_pid,
2673  				MAJOR(inode->i_sb->s_dev),
2674  				MINOR(inode->i_sb->s_dev), inode->i_ino);
2675  	} else {
2676  		seq_printf(f, "%d <none>:0 ", fl_pid);
2677  	}
2678  	if (IS_POSIX(fl)) {
2679  		if (fl->fl_end == OFFSET_MAX)
2680  			seq_printf(f, "%Ld EOF\n", fl->fl_start);
2681  		else
2682  			seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end);
2683  	} else {
2684  		seq_puts(f, "0 EOF\n");
2685  	}
2686  }
2687  
2688  static struct file_lock *get_next_blocked_member(struct file_lock *node)
2689  {
2690  	struct file_lock *tmp;
2691  
2692  	/* NULL node or root node */
2693  	if (node == NULL || node->fl_blocker == NULL)
2694  		return NULL;
2695  
2696  	/* Next member in the linked list could be itself */
2697  	tmp = list_next_entry(node, fl_blocked_member);
2698  	if (list_entry_is_head(tmp, &node->fl_blocker->fl_blocked_requests, fl_blocked_member)
2699  		|| tmp == node) {
2700  		return NULL;
2701  	}
2702  
2703  	return tmp;
2704  }
2705  
2706  static int locks_show(struct seq_file *f, void *v)
2707  {
2708  	struct locks_iterator *iter = f->private;
2709  	struct file_lock *cur, *tmp;
2710  	struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
2711  	int level = 0;
2712  
2713  	cur = hlist_entry(v, struct file_lock, fl_link);
2714  
2715  	if (locks_translate_pid(cur, proc_pidns) == 0)
2716  		return 0;
2717  
2718  	/* View this crossed linked list as a binary tree, the first member of fl_blocked_requests
2719  	 * is the left child of current node, the next silibing in fl_blocked_member is the
2720  	 * right child, we can alse get the parent of current node from fl_blocker, so this
2721  	 * question becomes traversal of a binary tree
2722  	 */
2723  	while (cur != NULL) {
2724  		if (level)
2725  			lock_get_status(f, cur, iter->li_pos, "-> ", level);
2726  		else
2727  			lock_get_status(f, cur, iter->li_pos, "", level);
2728  
2729  		if (!list_empty(&cur->fl_blocked_requests)) {
2730  			/* Turn left */
2731  			cur = list_first_entry_or_null(&cur->fl_blocked_requests,
2732  				struct file_lock, fl_blocked_member);
2733  			level++;
2734  		} else {
2735  			/* Turn right */
2736  			tmp = get_next_blocked_member(cur);
2737  			/* Fall back to parent node */
2738  			while (tmp == NULL && cur->fl_blocker != NULL) {
2739  				cur = cur->fl_blocker;
2740  				level--;
2741  				tmp = get_next_blocked_member(cur);
2742  			}
2743  			cur = tmp;
2744  		}
2745  	}
2746  
2747  	return 0;
2748  }
2749  
2750  static void __show_fd_locks(struct seq_file *f,
2751  			struct list_head *head, int *id,
2752  			struct file *filp, struct files_struct *files)
2753  {
2754  	struct file_lock *fl;
2755  
2756  	list_for_each_entry(fl, head, fl_list) {
2757  
2758  		if (filp != fl->fl_file)
2759  			continue;
2760  		if (fl->fl_owner != files &&
2761  		    fl->fl_owner != filp)
2762  			continue;
2763  
2764  		(*id)++;
2765  		seq_puts(f, "lock:\t");
2766  		lock_get_status(f, fl, *id, "", 0);
2767  	}
2768  }
2769  
2770  void show_fd_locks(struct seq_file *f,
2771  		  struct file *filp, struct files_struct *files)
2772  {
2773  	struct inode *inode = locks_inode(filp);
2774  	struct file_lock_context *ctx;
2775  	int id = 0;
2776  
2777  	ctx = smp_load_acquire(&inode->i_flctx);
2778  	if (!ctx)
2779  		return;
2780  
2781  	spin_lock(&ctx->flc_lock);
2782  	__show_fd_locks(f, &ctx->flc_flock, &id, filp, files);
2783  	__show_fd_locks(f, &ctx->flc_posix, &id, filp, files);
2784  	__show_fd_locks(f, &ctx->flc_lease, &id, filp, files);
2785  	spin_unlock(&ctx->flc_lock);
2786  }
2787  
2788  static void *locks_start(struct seq_file *f, loff_t *pos)
2789  	__acquires(&blocked_lock_lock)
2790  {
2791  	struct locks_iterator *iter = f->private;
2792  
2793  	iter->li_pos = *pos + 1;
2794  	percpu_down_write(&file_rwsem);
2795  	spin_lock(&blocked_lock_lock);
2796  	return seq_hlist_start_percpu(&file_lock_list.hlist, &iter->li_cpu, *pos);
2797  }
2798  
2799  static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
2800  {
2801  	struct locks_iterator *iter = f->private;
2802  
2803  	++iter->li_pos;
2804  	return seq_hlist_next_percpu(v, &file_lock_list.hlist, &iter->li_cpu, pos);
2805  }
2806  
2807  static void locks_stop(struct seq_file *f, void *v)
2808  	__releases(&blocked_lock_lock)
2809  {
2810  	spin_unlock(&blocked_lock_lock);
2811  	percpu_up_write(&file_rwsem);
2812  }
2813  
2814  static const struct seq_operations locks_seq_operations = {
2815  	.start	= locks_start,
2816  	.next	= locks_next,
2817  	.stop	= locks_stop,
2818  	.show	= locks_show,
2819  };
2820  
2821  static int __init proc_locks_init(void)
2822  {
2823  	proc_create_seq_private("locks", 0, NULL, &locks_seq_operations,
2824  			sizeof(struct locks_iterator), NULL);
2825  	return 0;
2826  }
2827  fs_initcall(proc_locks_init);
2828  #endif
2829  
2830  static int __init filelock_init(void)
2831  {
2832  	int i;
2833  
2834  	flctx_cache = kmem_cache_create("file_lock_ctx",
2835  			sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL);
2836  
2837  	filelock_cache = kmem_cache_create("file_lock_cache",
2838  			sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
2839  
2840  	for_each_possible_cpu(i) {
2841  		struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
2842  
2843  		spin_lock_init(&fll->lock);
2844  		INIT_HLIST_HEAD(&fll->hlist);
2845  	}
2846  
2847  	lease_notifier_chain_init();
2848  	return 0;
2849  }
2850  core_initcall(filelock_init);
2851