xref: /linux/fs/ocfs2/buffer_head_io.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * io.c
4  *
5  * Buffer cache handling
6  *
7  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/types.h>
12 #include <linux/highmem.h>
13 #include <linux/bio.h>
14 
15 #include <cluster/masklog.h>
16 
17 #include "ocfs2.h"
18 
19 #include "alloc.h"
20 #include "inode.h"
21 #include "journal.h"
22 #include "uptodate.h"
23 #include "buffer_head_io.h"
24 #include "ocfs2_trace.h"
25 
26 /*
27  * Bits on bh->b_state used by ocfs2.
28  *
29  * These MUST be after the JBD2 bits.  Hence, we use BH_JBDPrivateStart.
30  */
31 enum ocfs2_state_bits {
32 	BH_NeedsValidate = BH_JBDPrivateStart,
33 };
34 
35 /* Expand the magic b_state functions */
36 BUFFER_FNS(NeedsValidate, needs_validate);
37 
38 int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
39 		      struct ocfs2_caching_info *ci)
40 {
41 	int ret = 0;
42 
43 	trace_ocfs2_write_block((unsigned long long)bh->b_blocknr, ci);
44 
45 	BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
46 	BUG_ON(buffer_jbd(bh));
47 
48 	/* No need to check for a soft readonly file system here. non
49 	 * journalled writes are only ever done on system files which
50 	 * can get modified during recovery even if read-only. */
51 	if (ocfs2_is_hard_readonly(osb)) {
52 		ret = -EROFS;
53 		mlog_errno(ret);
54 		goto out;
55 	}
56 
57 	ocfs2_metadata_cache_io_lock(ci);
58 
59 	lock_buffer(bh);
60 	set_buffer_uptodate(bh);
61 
62 	/* remove from dirty list before I/O. */
63 	clear_buffer_dirty(bh);
64 
65 	bh_submit(bh, REQ_OP_WRITE, bh_end_write);
66 
67 	wait_on_buffer(bh);
68 
69 	if (buffer_uptodate(bh)) {
70 		ocfs2_set_buffer_uptodate(ci, bh);
71 	} else {
72 		/* We don't need to remove the clustered uptodate
73 		 * information for this bh as it's not marked locally
74 		 * uptodate. */
75 		ret = -EIO;
76 		mlog_errno(ret);
77 	}
78 
79 	ocfs2_metadata_cache_io_unlock(ci);
80 out:
81 	return ret;
82 }
83 
84 /* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
85  * will be easier to handle read failure.
86  */
87 int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
88 			   unsigned int nr, struct buffer_head *bhs[])
89 {
90 	int status = 0;
91 	unsigned int i;
92 	struct buffer_head *bh;
93 	int new_bh = 0;
94 
95 	trace_ocfs2_read_blocks_sync((unsigned long long)block, nr);
96 
97 	if (!nr)
98 		goto bail;
99 
100 	/* Don't put buffer head and re-assign it to NULL if it is allocated
101 	 * outside since the caller can't be aware of this alternation!
102 	 */
103 	new_bh = (bhs[0] == NULL);
104 
105 	for (i = 0 ; i < nr ; i++) {
106 		if (bhs[i] == NULL) {
107 			bhs[i] = sb_getblk(osb->sb, block++);
108 			if (bhs[i] == NULL) {
109 				status = -ENOMEM;
110 				mlog_errno(status);
111 				break;
112 			}
113 		}
114 		bh = bhs[i];
115 
116 		if (buffer_jbd(bh)) {
117 			trace_ocfs2_read_blocks_sync_jbd(
118 					(unsigned long long)bh->b_blocknr);
119 			continue;
120 		}
121 
122 		if (buffer_dirty(bh)) {
123 			/* This should probably be a BUG, or
124 			 * at least return an error. */
125 			mlog(ML_ERROR,
126 			     "trying to sync read a dirty "
127 			     "buffer! (blocknr = %llu), skipping\n",
128 			     (unsigned long long)bh->b_blocknr);
129 			continue;
130 		}
131 
132 		lock_buffer(bh);
133 		if (buffer_jbd(bh)) {
134 #ifdef CATCH_BH_JBD_RACES
135 			mlog(ML_ERROR,
136 			     "block %llu had the JBD bit set "
137 			     "while I was in lock_buffer!",
138 			     (unsigned long long)bh->b_blocknr);
139 			BUG();
140 #else
141 			unlock_buffer(bh);
142 			continue;
143 #endif
144 		}
145 
146 		bh_submit(bh, REQ_OP_READ, bh_end_read);
147 	}
148 
149 read_failure:
150 	for (i = nr; i > 0; i--) {
151 		bh = bhs[i - 1];
152 
153 		if (unlikely(status)) {
154 			if (new_bh && bh) {
155 				/* If middle bh fails, let previous bh
156 				 * finish its read and then put it to
157 				 * avoid bh leak
158 				 */
159 				if (!buffer_jbd(bh))
160 					wait_on_buffer(bh);
161 				put_bh(bh);
162 				bhs[i - 1] = NULL;
163 			} else if (bh && buffer_uptodate(bh)) {
164 				clear_buffer_uptodate(bh);
165 			}
166 			continue;
167 		}
168 
169 		/* No need to wait on the buffer if it's managed by JBD. */
170 		if (!buffer_jbd(bh))
171 			wait_on_buffer(bh);
172 
173 		if (!buffer_uptodate(bh)) {
174 			/* Status won't be cleared from here on out,
175 			 * so we can safely record this and loop back
176 			 * to cleanup the other buffers. */
177 			status = -EIO;
178 			goto read_failure;
179 		}
180 	}
181 
182 bail:
183 	return status;
184 }
185 
186 /* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
187  * will be easier to handle read failure.
188  */
189 int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
190 		      struct buffer_head *bhs[], int flags,
191 		      int (*validate)(struct super_block *sb,
192 				      struct buffer_head *bh))
193 {
194 	int status = 0;
195 	int i, ignore_cache = 0;
196 	struct buffer_head *bh;
197 	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
198 	int new_bh = 0;
199 
200 	trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags);
201 
202 	BUG_ON(!ci);
203 	BUG_ON((flags & OCFS2_BH_READAHEAD) &&
204 	       (flags & OCFS2_BH_IGNORE_CACHE));
205 
206 	if (bhs == NULL) {
207 		status = -EINVAL;
208 		mlog_errno(status);
209 		goto bail;
210 	}
211 
212 	if (nr < 0) {
213 		mlog(ML_ERROR, "asked to read %d blocks!\n", nr);
214 		status = -EINVAL;
215 		mlog_errno(status);
216 		goto bail;
217 	}
218 
219 	if (nr == 0) {
220 		status = 0;
221 		goto bail;
222 	}
223 
224 	/* Don't put buffer head and re-assign it to NULL if it is allocated
225 	 * outside since the caller can't be aware of this alternation!
226 	 */
227 	new_bh = (bhs[0] == NULL);
228 
229 	ocfs2_metadata_cache_io_lock(ci);
230 	for (i = 0 ; i < nr ; i++) {
231 		if (bhs[i] == NULL) {
232 			bhs[i] = sb_getblk(sb, block++);
233 			if (bhs[i] == NULL) {
234 				status = -ENOMEM;
235 				mlog_errno(status);
236 				/* Don't forget to put previous bh! */
237 				break;
238 			}
239 		}
240 		bh = bhs[i];
241 		ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE);
242 
243 		/* There are three read-ahead cases here which we need to
244 		 * be concerned with. All three assume a buffer has
245 		 * previously been submitted with OCFS2_BH_READAHEAD
246 		 * and it hasn't yet completed I/O.
247 		 *
248 		 * 1) The current request is sync to disk. This rarely
249 		 *    happens these days, and never when performance
250 		 *    matters - the code can just wait on the buffer
251 		 *    lock and re-submit.
252 		 *
253 		 * 2) The current request is cached, but not
254 		 *    readahead. ocfs2_buffer_uptodate() will return
255 		 *    false anyway, so we'll wind up waiting on the
256 		 *    buffer lock to do I/O. We re-check the request
257 		 *    with after getting the lock to avoid a re-submit.
258 		 *
259 		 * 3) The current request is readahead (and so must
260 		 *    also be a caching one). We short circuit if the
261 		 *    buffer is locked (under I/O) and if it's in the
262 		 *    uptodate cache. The re-check from #2 catches the
263 		 *    case that the previous read-ahead completes just
264 		 *    before our is-it-in-flight check.
265 		 */
266 
267 		if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) {
268 			trace_ocfs2_read_blocks_from_disk(
269 			     (unsigned long long)bh->b_blocknr,
270 			     (unsigned long long)ocfs2_metadata_cache_owner(ci));
271 			/* We're using ignore_cache here to say
272 			 * "go to disk" */
273 			ignore_cache = 1;
274 		}
275 
276 		trace_ocfs2_read_blocks_bh((unsigned long long)bh->b_blocknr,
277 			ignore_cache, buffer_jbd(bh), buffer_dirty(bh));
278 
279 		if (buffer_jbd(bh)) {
280 			continue;
281 		}
282 
283 		if (ignore_cache) {
284 			if (buffer_dirty(bh)) {
285 				/* This should probably be a BUG, or
286 				 * at least return an error. */
287 				continue;
288 			}
289 
290 			/* A read-ahead request was made - if the
291 			 * buffer is already under read-ahead from a
292 			 * previously submitted request than we are
293 			 * done here. */
294 			if ((flags & OCFS2_BH_READAHEAD)
295 			    && ocfs2_buffer_read_ahead(ci, bh))
296 				continue;
297 
298 			lock_buffer(bh);
299 			if (buffer_jbd(bh)) {
300 #ifdef CATCH_BH_JBD_RACES
301 				mlog(ML_ERROR, "block %llu had the JBD bit set "
302 					       "while I was in lock_buffer!",
303 				     (unsigned long long)bh->b_blocknr);
304 				BUG();
305 #else
306 				unlock_buffer(bh);
307 				continue;
308 #endif
309 			}
310 
311 			/* Re-check ocfs2_buffer_uptodate() as a
312 			 * previously read-ahead buffer may have
313 			 * completed I/O while we were waiting for the
314 			 * buffer lock. */
315 			if (!(flags & OCFS2_BH_IGNORE_CACHE)
316 			    && !(flags & OCFS2_BH_READAHEAD)
317 			    && ocfs2_buffer_uptodate(ci, bh)) {
318 				unlock_buffer(bh);
319 				continue;
320 			}
321 
322 			if (validate)
323 				set_buffer_needs_validate(bh);
324 			bh_submit(bh, REQ_OP_READ, bh_end_read);
325 			continue;
326 		}
327 	}
328 
329 read_failure:
330 	for (i = (nr - 1); i >= 0; i--) {
331 		bh = bhs[i];
332 
333 		if (!(flags & OCFS2_BH_READAHEAD)) {
334 			if (unlikely(status)) {
335 				/* Clear the buffers on error including those
336 				 * ever succeeded in reading
337 				 */
338 				if (new_bh && bh) {
339 					/* If middle bh fails, let previous bh
340 					 * finish its read and then put it to
341 					 * avoid bh leak
342 					 */
343 					if (!buffer_jbd(bh))
344 						wait_on_buffer(bh);
345 					put_bh(bh);
346 					bhs[i] = NULL;
347 				}
348 				continue;
349 			}
350 			/* We know this can't have changed as we hold the
351 			 * owner sem. Avoid doing any work on the bh if the
352 			 * journal has it. */
353 			if (!buffer_jbd(bh))
354 				wait_on_buffer(bh);
355 
356 			if (!buffer_uptodate(bh)) {
357 				/* Status won't be cleared from here on out,
358 				 * so we can safely record this and loop back
359 				 * to cleanup the other buffers. Don't need to
360 				 * remove the clustered uptodate information
361 				 * for this bh as it's not marked locally
362 				 * uptodate. */
363 				status = -EIO;
364 				clear_buffer_needs_validate(bh);
365 				goto read_failure;
366 			}
367 
368 			if (buffer_needs_validate(bh)) {
369 				/* We never set NeedsValidate if the
370 				 * buffer was held by the journal, so
371 				 * that better not have changed */
372 				BUG_ON(buffer_jbd(bh));
373 				clear_buffer_needs_validate(bh);
374 				status = validate(sb, bh);
375 				if (status) {
376 					if (buffer_uptodate(bh))
377 						clear_buffer_uptodate(bh);
378 					goto read_failure;
379 				}
380 			}
381 		}
382 
383 		/* Always set the buffer in the cache, even if it was
384 		 * a forced read, or read-ahead which hasn't yet
385 		 * completed. */
386 		if (bh)
387 			ocfs2_set_buffer_uptodate(ci, bh);
388 	}
389 	ocfs2_metadata_cache_io_unlock(ci);
390 
391 	trace_ocfs2_read_blocks_end((unsigned long long)block, nr,
392 				    flags, ignore_cache);
393 
394 bail:
395 
396 	return status;
397 }
398 
399 /* Check whether the blkno is the super block or one of the backups. */
400 static void ocfs2_check_super_or_backup(struct super_block *sb,
401 					sector_t blkno)
402 {
403 	int i;
404 	u64 backup_blkno;
405 
406 	if (blkno == OCFS2_SUPER_BLOCK_BLKNO)
407 		return;
408 
409 	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
410 		backup_blkno = ocfs2_backup_super_blkno(sb, i);
411 		if (backup_blkno == blkno)
412 			return;
413 	}
414 
415 	BUG();
416 }
417 
418 /*
419  * Write super block and backups doesn't need to collaborate with journal,
420  * so we don't need to lock ip_io_mutex and ci doesn't need to bea passed
421  * into this function.
422  */
423 int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
424 				struct buffer_head *bh)
425 {
426 	int ret = 0;
427 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
428 
429 	BUG_ON(buffer_jbd(bh));
430 	ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr);
431 
432 	if (unlikely(ocfs2_emergency_state(osb))) {
433 		ret = -EROFS;
434 		mlog_errno(ret);
435 		goto out;
436 	}
437 
438 	lock_buffer(bh);
439 	set_buffer_uptodate(bh);
440 
441 	/* remove from dirty list before I/O. */
442 	clear_buffer_dirty(bh);
443 
444 	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
445 	bh_submit(bh, REQ_OP_WRITE, bh_end_write);
446 
447 	wait_on_buffer(bh);
448 
449 	if (!buffer_uptodate(bh)) {
450 		ret = -EIO;
451 		mlog_errno(ret);
452 	}
453 
454 out:
455 	return ret;
456 }
457