xref: /linux/fs/ceph/io.c (revision e3966940559d52aa1800a008dcfeec218dd31f88)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2016 Trond Myklebust
4  * Copyright (c) 2019 Jeff Layton
5  *
6  * I/O and data path helper functionality.
7  *
8  * Heavily borrowed from equivalent code in fs/nfs/io.c
9  */
10 
11 #include <linux/ceph/ceph_debug.h>
12 
13 #include <linux/types.h>
14 #include <linux/kernel.h>
15 #include <linux/rwsem.h>
16 #include <linux/fs.h>
17 
18 #include "super.h"
19 #include "io.h"
20 
21 /* Call with exclusively locked inode->i_rwsem */
22 static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
23 {
24 	bool is_odirect;
25 
26 	lockdep_assert_held_write(&inode->i_rwsem);
27 
28 	spin_lock(&ci->i_ceph_lock);
29 	/* ensure that bit state is consistent */
30 	smp_mb__before_atomic();
31 	is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
32 	if (is_odirect) {
33 		clear_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags);
34 		/* ensure modified bit is visible */
35 		smp_mb__after_atomic();
36 	}
37 	spin_unlock(&ci->i_ceph_lock);
38 
39 	if (is_odirect)
40 		inode_dio_wait(inode);
41 }
42 
43 /**
44  * ceph_start_io_read - declare the file is being used for buffered reads
45  * @inode: file inode
46  *
47  * Declare that a buffered read operation is about to start, and ensure
48  * that we block all direct I/O.
49  * On exit, the function ensures that the CEPH_I_ODIRECT flag is unset,
50  * and holds a shared lock on inode->i_rwsem to ensure that the flag
51  * cannot be changed.
52  * In practice, this means that buffered read operations are allowed to
53  * execute in parallel, thanks to the shared lock, whereas direct I/O
54  * operations need to wait to grab an exclusive lock in order to set
55  * CEPH_I_ODIRECT.
56  * Note that buffered writes and truncates both take a write lock on
57  * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
58  */
59 int ceph_start_io_read(struct inode *inode)
60 {
61 	struct ceph_inode_info *ci = ceph_inode(inode);
62 	bool is_odirect;
63 	int err;
64 
65 	/* Be an optimist! */
66 	err = down_read_killable(&inode->i_rwsem);
67 	if (err)
68 		return err;
69 
70 	spin_lock(&ci->i_ceph_lock);
71 	/* ensure that bit state is consistent */
72 	smp_mb__before_atomic();
73 	is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
74 	spin_unlock(&ci->i_ceph_lock);
75 	if (!is_odirect)
76 		return 0;
77 	up_read(&inode->i_rwsem);
78 
79 	/* Slow path.... */
80 	err = down_write_killable(&inode->i_rwsem);
81 	if (err)
82 		return err;
83 
84 	ceph_block_o_direct(ci, inode);
85 	downgrade_write(&inode->i_rwsem);
86 
87 	return 0;
88 }
89 
90 /**
91  * ceph_end_io_read - declare that the buffered read operation is done
92  * @inode: file inode
93  *
94  * Declare that a buffered read operation is done, and release the shared
95  * lock on inode->i_rwsem.
96  */
97 void
98 ceph_end_io_read(struct inode *inode)
99 {
100 	up_read(&inode->i_rwsem);
101 }
102 
103 /**
104  * ceph_start_io_write - declare the file is being used for buffered writes
105  * @inode: file inode
106  *
107  * Declare that a buffered write operation is about to start, and ensure
108  * that we block all direct I/O.
109  */
110 int ceph_start_io_write(struct inode *inode)
111 {
112 	int err = down_write_killable(&inode->i_rwsem);
113 	if (!err)
114 		ceph_block_o_direct(ceph_inode(inode), inode);
115 	return err;
116 }
117 
118 /**
119  * ceph_end_io_write - declare that the buffered write operation is done
120  * @inode: file inode
121  *
122  * Declare that a buffered write operation is done, and release the
123  * lock on inode->i_rwsem.
124  */
125 void
126 ceph_end_io_write(struct inode *inode)
127 {
128 	up_write(&inode->i_rwsem);
129 }
130 
131 /* Call with exclusively locked inode->i_rwsem */
132 static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
133 {
134 	bool is_odirect;
135 
136 	lockdep_assert_held_write(&inode->i_rwsem);
137 
138 	spin_lock(&ci->i_ceph_lock);
139 	/* ensure that bit state is consistent */
140 	smp_mb__before_atomic();
141 	is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
142 	if (!is_odirect) {
143 		set_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags);
144 		/* ensure modified bit is visible */
145 		smp_mb__after_atomic();
146 	}
147 	spin_unlock(&ci->i_ceph_lock);
148 
149 	if (!is_odirect) {
150 		/* FIXME: unmap_mapping_range? */
151 		filemap_write_and_wait(inode->i_mapping);
152 	}
153 }
154 
155 /**
156  * ceph_start_io_direct - declare the file is being used for direct i/o
157  * @inode: file inode
158  *
159  * Declare that a direct I/O operation is about to start, and ensure
160  * that we block all buffered I/O.
161  * On exit, the function ensures that the CEPH_I_ODIRECT flag is set,
162  * and holds a shared lock on inode->i_rwsem to ensure that the flag
163  * cannot be changed.
164  * In practice, this means that direct I/O operations are allowed to
165  * execute in parallel, thanks to the shared lock, whereas buffered I/O
166  * operations need to wait to grab an exclusive lock in order to clear
167  * CEPH_I_ODIRECT.
168  * Note that buffered writes and truncates both take a write lock on
169  * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
170  */
171 int ceph_start_io_direct(struct inode *inode)
172 {
173 	struct ceph_inode_info *ci = ceph_inode(inode);
174 	bool is_odirect;
175 	int err;
176 
177 	/* Be an optimist! */
178 	err = down_read_killable(&inode->i_rwsem);
179 	if (err)
180 		return err;
181 
182 	spin_lock(&ci->i_ceph_lock);
183 	/* ensure that bit state is consistent */
184 	smp_mb__before_atomic();
185 	is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
186 	spin_unlock(&ci->i_ceph_lock);
187 	if (is_odirect)
188 		return 0;
189 	up_read(&inode->i_rwsem);
190 
191 	/* Slow path.... */
192 	err = down_write_killable(&inode->i_rwsem);
193 	if (err)
194 		return err;
195 
196 	ceph_block_buffered(ci, inode);
197 	downgrade_write(&inode->i_rwsem);
198 
199 	return 0;
200 }
201 
202 /**
203  * ceph_end_io_direct - declare that the direct i/o operation is done
204  * @inode: file inode
205  *
206  * Declare that a direct I/O operation is done, and release the shared
207  * lock on inode->i_rwsem.
208  */
209 void
210 ceph_end_io_direct(struct inode *inode)
211 {
212 	up_read(&inode->i_rwsem);
213 }
214