1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2016 Trond Myklebust
4 * Copyright (c) 2019 Jeff Layton
5 *
6 * I/O and data path helper functionality.
7 *
8 * Heavily borrowed from equivalent code in fs/nfs/io.c
9 */
10
11 #include <linux/ceph/ceph_debug.h>
12
13 #include <linux/types.h>
14 #include <linux/kernel.h>
15 #include <linux/rwsem.h>
16 #include <linux/fs.h>
17
18 #include "super.h"
19 #include "io.h"
20
21 /* Call with exclusively locked inode->i_rwsem */
ceph_block_o_direct(struct ceph_inode_info * ci,struct inode * inode)22 static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
23 {
24 bool is_odirect;
25
26 lockdep_assert_held_write(&inode->i_rwsem);
27
28 spin_lock(&ci->i_ceph_lock);
29 /* ensure that bit state is consistent */
30 smp_mb__before_atomic();
31 is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
32 if (is_odirect) {
33 clear_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags);
34 /* ensure modified bit is visible */
35 smp_mb__after_atomic();
36 }
37 spin_unlock(&ci->i_ceph_lock);
38
39 if (is_odirect)
40 inode_dio_wait(inode);
41 }
42
43 /**
44 * ceph_start_io_read - declare the file is being used for buffered reads
45 * @inode: file inode
46 *
47 * Declare that a buffered read operation is about to start, and ensure
48 * that we block all direct I/O.
49 * On exit, the function ensures that the CEPH_I_ODIRECT flag is unset,
50 * and holds a shared lock on inode->i_rwsem to ensure that the flag
51 * cannot be changed.
52 * In practice, this means that buffered read operations are allowed to
53 * execute in parallel, thanks to the shared lock, whereas direct I/O
54 * operations need to wait to grab an exclusive lock in order to set
55 * CEPH_I_ODIRECT.
56 * Note that buffered writes and truncates both take a write lock on
57 * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
58 */
ceph_start_io_read(struct inode * inode)59 int ceph_start_io_read(struct inode *inode)
60 {
61 struct ceph_inode_info *ci = ceph_inode(inode);
62 bool is_odirect;
63 int err;
64
65 /* Be an optimist! */
66 err = down_read_killable(&inode->i_rwsem);
67 if (err)
68 return err;
69
70 spin_lock(&ci->i_ceph_lock);
71 /* ensure that bit state is consistent */
72 smp_mb__before_atomic();
73 is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
74 spin_unlock(&ci->i_ceph_lock);
75 if (!is_odirect)
76 return 0;
77 up_read(&inode->i_rwsem);
78
79 /* Slow path.... */
80 err = down_write_killable(&inode->i_rwsem);
81 if (err)
82 return err;
83
84 ceph_block_o_direct(ci, inode);
85 downgrade_write(&inode->i_rwsem);
86
87 return 0;
88 }
89
90 /**
91 * ceph_end_io_read - declare that the buffered read operation is done
92 * @inode: file inode
93 *
94 * Declare that a buffered read operation is done, and release the shared
95 * lock on inode->i_rwsem.
96 */
97 void
ceph_end_io_read(struct inode * inode)98 ceph_end_io_read(struct inode *inode)
99 {
100 up_read(&inode->i_rwsem);
101 }
102
103 /**
104 * ceph_start_io_write - declare the file is being used for buffered writes
105 * @inode: file inode
106 *
107 * Declare that a buffered write operation is about to start, and ensure
108 * that we block all direct I/O.
109 */
ceph_start_io_write(struct inode * inode)110 int ceph_start_io_write(struct inode *inode)
111 {
112 int err = down_write_killable(&inode->i_rwsem);
113 if (!err)
114 ceph_block_o_direct(ceph_inode(inode), inode);
115 return err;
116 }
117
118 /**
119 * ceph_end_io_write - declare that the buffered write operation is done
120 * @inode: file inode
121 *
122 * Declare that a buffered write operation is done, and release the
123 * lock on inode->i_rwsem.
124 */
125 void
ceph_end_io_write(struct inode * inode)126 ceph_end_io_write(struct inode *inode)
127 {
128 up_write(&inode->i_rwsem);
129 }
130
131 /* Call with exclusively locked inode->i_rwsem */
ceph_block_buffered(struct ceph_inode_info * ci,struct inode * inode)132 static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
133 {
134 bool is_odirect;
135
136 lockdep_assert_held_write(&inode->i_rwsem);
137
138 spin_lock(&ci->i_ceph_lock);
139 /* ensure that bit state is consistent */
140 smp_mb__before_atomic();
141 is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
142 if (!is_odirect) {
143 set_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags);
144 /* ensure modified bit is visible */
145 smp_mb__after_atomic();
146 }
147 spin_unlock(&ci->i_ceph_lock);
148
149 if (!is_odirect) {
150 /* FIXME: unmap_mapping_range? */
151 filemap_write_and_wait(inode->i_mapping);
152 }
153 }
154
155 /**
156 * ceph_start_io_direct - declare the file is being used for direct i/o
157 * @inode: file inode
158 *
159 * Declare that a direct I/O operation is about to start, and ensure
160 * that we block all buffered I/O.
161 * On exit, the function ensures that the CEPH_I_ODIRECT flag is set,
162 * and holds a shared lock on inode->i_rwsem to ensure that the flag
163 * cannot be changed.
164 * In practice, this means that direct I/O operations are allowed to
165 * execute in parallel, thanks to the shared lock, whereas buffered I/O
166 * operations need to wait to grab an exclusive lock in order to clear
167 * CEPH_I_ODIRECT.
168 * Note that buffered writes and truncates both take a write lock on
169 * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
170 */
ceph_start_io_direct(struct inode * inode)171 int ceph_start_io_direct(struct inode *inode)
172 {
173 struct ceph_inode_info *ci = ceph_inode(inode);
174 bool is_odirect;
175 int err;
176
177 /* Be an optimist! */
178 err = down_read_killable(&inode->i_rwsem);
179 if (err)
180 return err;
181
182 spin_lock(&ci->i_ceph_lock);
183 /* ensure that bit state is consistent */
184 smp_mb__before_atomic();
185 is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
186 spin_unlock(&ci->i_ceph_lock);
187 if (is_odirect)
188 return 0;
189 up_read(&inode->i_rwsem);
190
191 /* Slow path.... */
192 err = down_write_killable(&inode->i_rwsem);
193 if (err)
194 return err;
195
196 ceph_block_buffered(ci, inode);
197 downgrade_write(&inode->i_rwsem);
198
199 return 0;
200 }
201
202 /**
203 * ceph_end_io_direct - declare that the direct i/o operation is done
204 * @inode: file inode
205 *
206 * Declare that a direct I/O operation is done, and release the shared
207 * lock on inode->i_rwsem.
208 */
209 void
ceph_end_io_direct(struct inode * inode)210 ceph_end_io_direct(struct inode *inode)
211 {
212 up_read(&inode->i_rwsem);
213 }
214