xref: /linux/fs/xfs/xfs_iomap.c (revision f3d9478b2ce468c3115b02ecae7e975990697f15)
1 /*
2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_bit.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_dir2.h"
27 #include "xfs_alloc.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_quota.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h"
37 #include "xfs_inode.h"
38 #include "xfs_ialloc.h"
39 #include "xfs_btree.h"
40 #include "xfs_bmap.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_error.h"
43 #include "xfs_itable.h"
44 #include "xfs_rw.h"
45 #include "xfs_acl.h"
46 #include "xfs_cap.h"
47 #include "xfs_mac.h"
48 #include "xfs_attr.h"
49 #include "xfs_buf_item.h"
50 #include "xfs_trans_space.h"
51 #include "xfs_utils.h"
52 #include "xfs_iomap.h"
53 
54 #if defined(XFS_RW_TRACE)
55 void
56 xfs_iomap_enter_trace(
57 	int		tag,
58 	xfs_iocore_t	*io,
59 	xfs_off_t	offset,
60 	ssize_t		count)
61 {
62 	xfs_inode_t	*ip = XFS_IO_INODE(io);
63 
64 	if (!ip->i_rwtrace)
65 		return;
66 
67 	ktrace_enter(ip->i_rwtrace,
68 		(void *)((unsigned long)tag),
69 		(void *)ip,
70 		(void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
71 		(void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
72 		(void *)((unsigned long)((offset >> 32) & 0xffffffff)),
73 		(void *)((unsigned long)(offset & 0xffffffff)),
74 		(void *)((unsigned long)count),
75 		(void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
76 		(void *)((unsigned long)(io->io_new_size & 0xffffffff)),
77 		(void *)((unsigned long)current_pid()),
78 		(void *)NULL,
79 		(void *)NULL,
80 		(void *)NULL,
81 		(void *)NULL,
82 		(void *)NULL,
83 		(void *)NULL);
84 }
85 
86 void
87 xfs_iomap_map_trace(
88 	int		tag,
89 	xfs_iocore_t	*io,
90 	xfs_off_t	offset,
91 	ssize_t		count,
92 	xfs_iomap_t	*iomapp,
93 	xfs_bmbt_irec_t	*imapp,
94 	int		flags)
95 {
96 	xfs_inode_t	*ip = XFS_IO_INODE(io);
97 
98 	if (!ip->i_rwtrace)
99 		return;
100 
101 	ktrace_enter(ip->i_rwtrace,
102 		(void *)((unsigned long)tag),
103 		(void *)ip,
104 		(void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
105 		(void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
106 		(void *)((unsigned long)((offset >> 32) & 0xffffffff)),
107 		(void *)((unsigned long)(offset & 0xffffffff)),
108 		(void *)((unsigned long)count),
109 		(void *)((unsigned long)flags),
110 		(void *)((unsigned long)((iomapp->iomap_offset >> 32) & 0xffffffff)),
111 		(void *)((unsigned long)(iomapp->iomap_offset & 0xffffffff)),
112 		(void *)((unsigned long)(iomapp->iomap_delta)),
113 		(void *)((unsigned long)(iomapp->iomap_bsize)),
114 		(void *)((unsigned long)(iomapp->iomap_bn)),
115 		(void *)(__psint_t)(imapp->br_startoff),
116 		(void *)((unsigned long)(imapp->br_blockcount)),
117 		(void *)(__psint_t)(imapp->br_startblock));
118 }
119 #else
120 #define xfs_iomap_enter_trace(tag, io, offset, count)
121 #define xfs_iomap_map_trace(tag, io, offset, count, iomapp, imapp, flags)
122 #endif
123 
124 #define XFS_WRITEIO_ALIGN(mp,off)	(((off) >> mp->m_writeio_log) \
125 						<< mp->m_writeio_log)
126 #define XFS_STRAT_WRITE_IMAPS	2
127 #define XFS_WRITE_IMAPS		XFS_BMAP_MAX_NMAP
128 
129 STATIC int
130 xfs_imap_to_bmap(
131 	xfs_iocore_t	*io,
132 	xfs_off_t	offset,
133 	xfs_bmbt_irec_t *imap,
134 	xfs_iomap_t	*iomapp,
135 	int		imaps,			/* Number of imap entries */
136 	int		iomaps,			/* Number of iomap entries */
137 	int		flags)
138 {
139 	xfs_mount_t	*mp;
140 	xfs_fsize_t	nisize;
141 	int		pbm;
142 	xfs_fsblock_t	start_block;
143 
144 	mp = io->io_mount;
145 	nisize = XFS_SIZE(mp, io);
146 	if (io->io_new_size > nisize)
147 		nisize = io->io_new_size;
148 
149 	for (pbm = 0; imaps && pbm < iomaps; imaps--, iomapp++, imap++, pbm++) {
150 		iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
151 		iomapp->iomap_delta = offset - iomapp->iomap_offset;
152 		iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
153 		iomapp->iomap_flags = flags;
154 
155 		if (io->io_flags & XFS_IOCORE_RT) {
156 			iomapp->iomap_flags |= IOMAP_REALTIME;
157 			iomapp->iomap_target = mp->m_rtdev_targp;
158 		} else {
159 			iomapp->iomap_target = mp->m_ddev_targp;
160 		}
161 		start_block = imap->br_startblock;
162 		if (start_block == HOLESTARTBLOCK) {
163 			iomapp->iomap_bn = IOMAP_DADDR_NULL;
164 			iomapp->iomap_flags |= IOMAP_HOLE;
165 		} else if (start_block == DELAYSTARTBLOCK) {
166 			iomapp->iomap_bn = IOMAP_DADDR_NULL;
167 			iomapp->iomap_flags |= IOMAP_DELAY;
168 		} else {
169 			iomapp->iomap_bn = XFS_FSB_TO_DB_IO(io, start_block);
170 			if (ISUNWRITTEN(imap))
171 				iomapp->iomap_flags |= IOMAP_UNWRITTEN;
172 		}
173 
174 		if ((iomapp->iomap_offset + iomapp->iomap_bsize) >= nisize) {
175 			iomapp->iomap_flags |= IOMAP_EOF;
176 		}
177 
178 		offset += iomapp->iomap_bsize - iomapp->iomap_delta;
179 	}
180 	return pbm;	/* Return the number filled */
181 }
182 
183 int
184 xfs_iomap(
185 	xfs_iocore_t	*io,
186 	xfs_off_t	offset,
187 	ssize_t		count,
188 	int		flags,
189 	xfs_iomap_t	*iomapp,
190 	int		*niomaps)
191 {
192 	xfs_mount_t	*mp = io->io_mount;
193 	xfs_fileoff_t	offset_fsb, end_fsb;
194 	int		error = 0;
195 	int		lockmode = 0;
196 	xfs_bmbt_irec_t	imap;
197 	int		nimaps = 1;
198 	int		bmapi_flags = 0;
199 	int		iomap_flags = 0;
200 
201 	if (XFS_FORCED_SHUTDOWN(mp))
202 		return XFS_ERROR(EIO);
203 
204 	switch (flags &
205 		(BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE |
206 		 BMAPI_UNWRITTEN | BMAPI_DEVICE)) {
207 	case BMAPI_READ:
208 		xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count);
209 		lockmode = XFS_LCK_MAP_SHARED(mp, io);
210 		bmapi_flags = XFS_BMAPI_ENTIRE;
211 		break;
212 	case BMAPI_WRITE:
213 		xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count);
214 		lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
215 		if (flags & BMAPI_IGNSTATE)
216 			bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
217 		XFS_ILOCK(mp, io, lockmode);
218 		break;
219 	case BMAPI_ALLOCATE:
220 		xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, io, offset, count);
221 		lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD;
222 		bmapi_flags = XFS_BMAPI_ENTIRE;
223 		/* Attempt non-blocking lock */
224 		if (flags & BMAPI_TRYLOCK) {
225 			if (!XFS_ILOCK_NOWAIT(mp, io, lockmode))
226 				return XFS_ERROR(EAGAIN);
227 		} else {
228 			XFS_ILOCK(mp, io, lockmode);
229 		}
230 		break;
231 	case BMAPI_UNWRITTEN:
232 		goto phase2;
233 	case BMAPI_DEVICE:
234 		lockmode = XFS_LCK_MAP_SHARED(mp, io);
235 		iomapp->iomap_target = io->io_flags & XFS_IOCORE_RT ?
236 			mp->m_rtdev_targp : mp->m_ddev_targp;
237 		error = 0;
238 		*niomaps = 1;
239 		goto out;
240 	default:
241 		BUG();
242 	}
243 
244 	ASSERT(offset <= mp->m_maxioffset);
245 	if ((xfs_fsize_t)offset + count > mp->m_maxioffset)
246 		count = mp->m_maxioffset - offset;
247 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
248 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
249 
250 	error = XFS_BMAPI(mp, NULL, io, offset_fsb,
251 			(xfs_filblks_t)(end_fsb - offset_fsb),
252 			bmapi_flags,  NULL, 0, &imap,
253 			&nimaps, NULL, NULL);
254 
255 	if (error)
256 		goto out;
257 
258 phase2:
259 	switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE|BMAPI_UNWRITTEN)) {
260 	case BMAPI_WRITE:
261 		/* If we found an extent, return it */
262 		if (nimaps &&
263 		    (imap.br_startblock != HOLESTARTBLOCK) &&
264 		    (imap.br_startblock != DELAYSTARTBLOCK)) {
265 			xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
266 					offset, count, iomapp, &imap, flags);
267 			break;
268 		}
269 
270 		if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
271 			error = XFS_IOMAP_WRITE_DIRECT(mp, io, offset,
272 					count, flags, &imap, &nimaps, nimaps);
273 		} else {
274 			error = XFS_IOMAP_WRITE_DELAY(mp, io, offset, count,
275 					flags, &imap, &nimaps);
276 		}
277 		if (!error) {
278 			xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, io,
279 					offset, count, iomapp, &imap, flags);
280 		}
281 		iomap_flags = IOMAP_NEW;
282 		break;
283 	case BMAPI_ALLOCATE:
284 		/* If we found an extent, return it */
285 		XFS_IUNLOCK(mp, io, lockmode);
286 		lockmode = 0;
287 
288 		if (nimaps && !ISNULLSTARTBLOCK(imap.br_startblock)) {
289 			xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
290 					offset, count, iomapp, &imap, flags);
291 			break;
292 		}
293 
294 		error = XFS_IOMAP_WRITE_ALLOCATE(mp, io, offset, count,
295 						 &imap, &nimaps);
296 		break;
297 	case BMAPI_UNWRITTEN:
298 		lockmode = 0;
299 		error = XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count);
300 		nimaps = 0;
301 		break;
302 	}
303 
304 	if (nimaps) {
305 		*niomaps = xfs_imap_to_bmap(io, offset, &imap,
306 					    iomapp, nimaps, *niomaps, iomap_flags);
307 	} else if (niomaps) {
308 		*niomaps = 0;
309 	}
310 
311 out:
312 	if (lockmode)
313 		XFS_IUNLOCK(mp, io, lockmode);
314 	return XFS_ERROR(error);
315 }
316 
317 STATIC int
318 xfs_iomap_eof_align_last_fsb(
319 	xfs_mount_t	*mp,
320 	xfs_iocore_t	*io,
321 	xfs_fsize_t	isize,
322 	xfs_extlen_t	extsize,
323 	xfs_fileoff_t	*last_fsb)
324 {
325 	xfs_fileoff_t	new_last_fsb = 0;
326 	xfs_extlen_t	align;
327 	int		eof, error;
328 
329 	if (io->io_flags & XFS_IOCORE_RT)
330 		;
331 	/*
332 	 * If mounted with the "-o swalloc" option, roundup the allocation
333 	 * request to a stripe width boundary if the file size is >=
334 	 * stripe width and we are allocating past the allocation eof.
335 	 */
336 	else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
337 	        (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)))
338 		new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
339 	/*
340 	 * Roundup the allocation request to a stripe unit (m_dalign) boundary
341 	 * if the file size is >= stripe unit size, and we are allocating past
342 	 * the allocation eof.
343 	 */
344 	else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)))
345 		new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
346 
347 	/*
348 	 * Always round up the allocation request to an extent boundary
349 	 * (when file on a real-time subvolume or has di_extsize hint).
350 	 */
351 	if (extsize) {
352 		if (new_last_fsb)
353 			align = roundup_64(new_last_fsb, extsize);
354 		else
355 			align = extsize;
356 		new_last_fsb = roundup_64(*last_fsb, align);
357 	}
358 
359 	if (new_last_fsb) {
360 		error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
361 		if (error)
362 			return error;
363 		if (eof)
364 			*last_fsb = new_last_fsb;
365 	}
366 	return 0;
367 }
368 
369 STATIC int
370 xfs_flush_space(
371 	xfs_inode_t	*ip,
372 	int		*fsynced,
373 	int		*ioflags)
374 {
375 	switch (*fsynced) {
376 	case 0:
377 		if (ip->i_delayed_blks) {
378 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
379 			xfs_flush_inode(ip);
380 			xfs_ilock(ip, XFS_ILOCK_EXCL);
381 			*fsynced = 1;
382 		} else {
383 			*ioflags |= BMAPI_SYNC;
384 			*fsynced = 2;
385 		}
386 		return 0;
387 	case 1:
388 		*fsynced = 2;
389 		*ioflags |= BMAPI_SYNC;
390 		return 0;
391 	case 2:
392 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
393 		xfs_flush_device(ip);
394 		xfs_ilock(ip, XFS_ILOCK_EXCL);
395 		*fsynced = 3;
396 		return 0;
397 	}
398 	return 1;
399 }
400 
401 int
402 xfs_iomap_write_direct(
403 	xfs_inode_t	*ip,
404 	xfs_off_t	offset,
405 	size_t		count,
406 	int		flags,
407 	xfs_bmbt_irec_t *ret_imap,
408 	int		*nmaps,
409 	int		found)
410 {
411 	xfs_mount_t	*mp = ip->i_mount;
412 	xfs_iocore_t	*io = &ip->i_iocore;
413 	xfs_fileoff_t	offset_fsb;
414 	xfs_fileoff_t	last_fsb;
415 	xfs_filblks_t	count_fsb, resaligned;
416 	xfs_fsblock_t	firstfsb;
417 	xfs_extlen_t	extsz, temp;
418 	xfs_fsize_t	isize;
419 	int		nimaps;
420 	int		bmapi_flag;
421 	int		quota_flag;
422 	int		rt;
423 	xfs_trans_t	*tp;
424 	xfs_bmbt_irec_t imap;
425 	xfs_bmap_free_t free_list;
426 	uint		qblocks, resblks, resrtextents;
427 	int		committed;
428 	int		error;
429 
430 	/*
431 	 * Make sure that the dquots are there. This doesn't hold
432 	 * the ilock across a disk read.
433 	 */
434 	error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED);
435 	if (error)
436 		return XFS_ERROR(error);
437 
438 	rt = XFS_IS_REALTIME_INODE(ip);
439 	if (unlikely(rt)) {
440 		if (!(extsz = ip->i_d.di_extsize))
441 			extsz = mp->m_sb.sb_rextsize;
442 	} else {
443 		extsz = ip->i_d.di_extsize;
444 	}
445 
446 	isize = ip->i_d.di_size;
447 	if (io->io_new_size > isize)
448 		isize = io->io_new_size;
449 
450   	offset_fsb = XFS_B_TO_FSBT(mp, offset);
451   	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
452 	if ((offset + count) > isize) {
453 		error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
454 							&last_fsb);
455 		if (error)
456 			goto error_out;
457 	} else {
458 		if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
459 			last_fsb = MIN(last_fsb, (xfs_fileoff_t)
460 					ret_imap->br_blockcount +
461 					ret_imap->br_startoff);
462 	}
463 	count_fsb = last_fsb - offset_fsb;
464 	ASSERT(count_fsb > 0);
465 
466 	resaligned = count_fsb;
467 	if (unlikely(extsz)) {
468 		if ((temp = do_mod(offset_fsb, extsz)))
469 			resaligned += temp;
470 		if ((temp = do_mod(resaligned, extsz)))
471 			resaligned += extsz - temp;
472 	}
473 
474 	if (unlikely(rt)) {
475 		resrtextents = qblocks = resaligned;
476 		resrtextents /= mp->m_sb.sb_rextsize;
477   		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
478   		quota_flag = XFS_QMOPT_RES_RTBLKS;
479   	} else {
480   		resrtextents = 0;
481 		resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
482   		quota_flag = XFS_QMOPT_RES_REGBLKS;
483   	}
484 
485 	/*
486 	 * Allocate and setup the transaction
487 	 */
488 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
489 	tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
490 	error = xfs_trans_reserve(tp, resblks,
491 			XFS_WRITE_LOG_RES(mp), resrtextents,
492 			XFS_TRANS_PERM_LOG_RES,
493 			XFS_WRITE_LOG_COUNT);
494 	/*
495 	 * Check for running out of space, note: need lock to return
496 	 */
497 	if (error)
498 		xfs_trans_cancel(tp, 0);
499 	xfs_ilock(ip, XFS_ILOCK_EXCL);
500 	if (error)
501 		goto error_out;
502 
503 	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
504 					      qblocks, 0, quota_flag);
505 	if (error)
506 		goto error1;
507 
508 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
509 	xfs_trans_ihold(tp, ip);
510 
511 	bmapi_flag = XFS_BMAPI_WRITE;
512 	if ((flags & BMAPI_DIRECT) && (offset < ip->i_d.di_size || extsz))
513 		bmapi_flag |= XFS_BMAPI_PREALLOC;
514 
515 	/*
516 	 * Issue the xfs_bmapi() call to allocate the blocks
517 	 */
518 	XFS_BMAP_INIT(&free_list, &firstfsb);
519 	nimaps = 1;
520 	error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb, bmapi_flag,
521 		&firstfsb, 0, &imap, &nimaps, &free_list, NULL);
522 	if (error)
523 		goto error0;
524 
525 	/*
526 	 * Complete the transaction
527 	 */
528 	error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
529 	if (error)
530 		goto error0;
531 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
532 	if (error)
533 		goto error_out;
534 
535 	/*
536 	 * Copy any maps to caller's array and return any error.
537 	 */
538 	if (nimaps == 0) {
539 		error = (ENOSPC);
540 		goto error_out;
541 	}
542 
543 	*ret_imap = imap;
544 	*nmaps = 1;
545 	if ( !(io->io_flags & XFS_IOCORE_RT)  && !ret_imap->br_startblock) {
546                 cmn_err(CE_PANIC,"Access to block zero:  fs <%s> inode: %lld "
547                         "start_block : %llx start_off : %llx blkcnt : %llx "
548                         "extent-state : %x \n",
549                         (ip->i_mount)->m_fsname,
550                         (long long)ip->i_ino,
551                         (unsigned long long)ret_imap->br_startblock,
552 			(unsigned long long)ret_imap->br_startoff,
553                         (unsigned long long)ret_imap->br_blockcount,
554 			ret_imap->br_state);
555         }
556 	return 0;
557 
558 error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
559 	xfs_bmap_cancel(&free_list);
560 	XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
561 
562 error1:	/* Just cancel transaction */
563 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
564 	*nmaps = 0;	/* nothing set-up here */
565 
566 error_out:
567 	return XFS_ERROR(error);
568 }
569 
570 /*
571  * If the caller is doing a write at the end of the file,
572  * then extend the allocation out to the file system's write
573  * iosize.  We clean up any extra space left over when the
574  * file is closed in xfs_inactive().
575  *
576  * For sync writes, we are flushing delayed allocate space to
577  * try to make additional space available for allocation near
578  * the filesystem full boundary - preallocation hurts in that
579  * situation, of course.
580  */
581 STATIC int
582 xfs_iomap_eof_want_preallocate(
583 	xfs_mount_t	*mp,
584 	xfs_iocore_t	*io,
585 	xfs_fsize_t	isize,
586 	xfs_off_t	offset,
587 	size_t		count,
588 	int		ioflag,
589 	xfs_bmbt_irec_t *imap,
590 	int		nimaps,
591 	int		*prealloc)
592 {
593 	xfs_fileoff_t   start_fsb;
594 	xfs_filblks_t   count_fsb;
595 	xfs_fsblock_t	firstblock;
596 	int		n, error, imaps;
597 
598 	*prealloc = 0;
599 	if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize)
600 		return 0;
601 
602 	/*
603 	 * If there are any real blocks past eof, then don't
604 	 * do any speculative allocation.
605 	 */
606 	start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
607 	count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
608 	while (count_fsb > 0) {
609 		imaps = nimaps;
610 		firstblock = NULLFSBLOCK;
611 		error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, 0,
612 				  &firstblock, 0, imap, &imaps, NULL, NULL);
613 		if (error)
614 			return error;
615 		for (n = 0; n < imaps; n++) {
616 			if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
617 			    (imap[n].br_startblock != DELAYSTARTBLOCK))
618 				return 0;
619 			start_fsb += imap[n].br_blockcount;
620 			count_fsb -= imap[n].br_blockcount;
621 		}
622 	}
623 	*prealloc = 1;
624 	return 0;
625 }
626 
627 int
628 xfs_iomap_write_delay(
629 	xfs_inode_t	*ip,
630 	xfs_off_t	offset,
631 	size_t		count,
632 	int		ioflag,
633 	xfs_bmbt_irec_t *ret_imap,
634 	int		*nmaps)
635 {
636 	xfs_mount_t	*mp = ip->i_mount;
637 	xfs_iocore_t	*io = &ip->i_iocore;
638 	xfs_fileoff_t	offset_fsb;
639 	xfs_fileoff_t	last_fsb;
640 	xfs_off_t	aligned_offset;
641 	xfs_fileoff_t	ioalign;
642 	xfs_fsblock_t	firstblock;
643 	xfs_extlen_t	extsz;
644 	xfs_fsize_t	isize;
645 	int		nimaps;
646 	xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
647 	int		prealloc, fsynced = 0;
648 	int		error;
649 
650 	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
651 
652 	/*
653 	 * Make sure that the dquots are there. This doesn't hold
654 	 * the ilock across a disk read.
655 	 */
656 	error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
657 	if (error)
658 		return XFS_ERROR(error);
659 
660 	if (XFS_IS_REALTIME_INODE(ip)) {
661 		if (!(extsz = ip->i_d.di_extsize))
662 			extsz = mp->m_sb.sb_rextsize;
663 	} else {
664 		extsz = ip->i_d.di_extsize;
665 	}
666 
667 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
668 
669 retry:
670 	isize = ip->i_d.di_size;
671 	if (io->io_new_size > isize)
672 		isize = io->io_new_size;
673 
674 	error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count,
675 				ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
676 	if (error)
677 		return error;
678 
679 	if (prealloc) {
680 		aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
681 		ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
682 		last_fsb = ioalign + mp->m_writeio_blocks;
683 	} else {
684 		last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
685 	}
686 
687 	if (prealloc || extsz) {
688 		error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
689 							&last_fsb);
690 		if (error)
691 			return error;
692 	}
693 
694 	nimaps = XFS_WRITE_IMAPS;
695 	firstblock = NULLFSBLOCK;
696 	error = XFS_BMAPI(mp, NULL, io, offset_fsb,
697 			  (xfs_filblks_t)(last_fsb - offset_fsb),
698 			  XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
699 			  XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
700 			  &nimaps, NULL, NULL);
701 	if (error && (error != ENOSPC))
702 		return XFS_ERROR(error);
703 
704 	/*
705 	 * If bmapi returned us nothing, and if we didn't get back EDQUOT,
706 	 * then we must have run out of space - flush delalloc, and retry..
707 	 */
708 	if (nimaps == 0) {
709 		xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
710 					io, offset, count);
711 		if (xfs_flush_space(ip, &fsynced, &ioflag))
712 			return XFS_ERROR(ENOSPC);
713 
714 		error = 0;
715 		goto retry;
716 	}
717 
718 	if (!(io->io_flags & XFS_IOCORE_RT)  && !ret_imap->br_startblock) {
719 		cmn_err(CE_PANIC,"Access to block zero:  fs <%s> inode: %lld "
720                         "start_block : %llx start_off : %llx blkcnt : %llx "
721                         "extent-state : %x \n",
722                         (ip->i_mount)->m_fsname,
723                         (long long)ip->i_ino,
724                         (unsigned long long)ret_imap->br_startblock,
725 			(unsigned long long)ret_imap->br_startoff,
726                         (unsigned long long)ret_imap->br_blockcount,
727 			ret_imap->br_state);
728 	}
729 
730 	*ret_imap = imap[0];
731 	*nmaps = 1;
732 
733 	return 0;
734 }
735 
736 /*
737  * Pass in a delayed allocate extent, convert it to real extents;
738  * return to the caller the extent we create which maps on top of
739  * the originating callers request.
740  *
741  * Called without a lock on the inode.
742  */
743 int
744 xfs_iomap_write_allocate(
745 	xfs_inode_t	*ip,
746 	xfs_off_t	offset,
747 	size_t		count,
748 	xfs_bmbt_irec_t *map,
749 	int		*retmap)
750 {
751 	xfs_mount_t	*mp = ip->i_mount;
752 	xfs_iocore_t    *io = &ip->i_iocore;
753 	xfs_fileoff_t	offset_fsb, last_block;
754 	xfs_fileoff_t	end_fsb, map_start_fsb;
755 	xfs_fsblock_t	first_block;
756 	xfs_bmap_free_t	free_list;
757 	xfs_filblks_t	count_fsb;
758 	xfs_bmbt_irec_t	imap[XFS_STRAT_WRITE_IMAPS];
759 	xfs_trans_t	*tp;
760 	int		i, nimaps, committed;
761 	int		error = 0;
762 	int		nres;
763 
764 	*retmap = 0;
765 
766 	/*
767 	 * Make sure that the dquots are there.
768 	 */
769 	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
770 		return XFS_ERROR(error);
771 
772 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
773 	count_fsb = map->br_blockcount;
774 	map_start_fsb = map->br_startoff;
775 
776 	XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
777 
778 	while (count_fsb != 0) {
779 		/*
780 		 * Set up a transaction with which to allocate the
781 		 * backing store for the file.  Do allocations in a
782 		 * loop until we get some space in the range we are
783 		 * interested in.  The other space that might be allocated
784 		 * is in the delayed allocation extent on which we sit
785 		 * but before our buffer starts.
786 		 */
787 
788 		nimaps = 0;
789 		while (nimaps == 0) {
790 			tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
791 			nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
792 			error = xfs_trans_reserve(tp, nres,
793 					XFS_WRITE_LOG_RES(mp),
794 					0, XFS_TRANS_PERM_LOG_RES,
795 					XFS_WRITE_LOG_COUNT);
796 			if (error == ENOSPC) {
797 				error = xfs_trans_reserve(tp, 0,
798 						XFS_WRITE_LOG_RES(mp),
799 						0,
800 						XFS_TRANS_PERM_LOG_RES,
801 						XFS_WRITE_LOG_COUNT);
802 			}
803 			if (error) {
804 				xfs_trans_cancel(tp, 0);
805 				return XFS_ERROR(error);
806 			}
807 			xfs_ilock(ip, XFS_ILOCK_EXCL);
808 			xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
809 			xfs_trans_ihold(tp, ip);
810 
811 			XFS_BMAP_INIT(&free_list, &first_block);
812 
813 			nimaps = XFS_STRAT_WRITE_IMAPS;
814 			/*
815 			 * Ensure we don't go beyond eof - it is possible
816 			 * the extents changed since we did the read call,
817 			 * we dropped the ilock in the interim.
818 			 */
819 
820 			end_fsb = XFS_B_TO_FSB(mp, ip->i_d.di_size);
821 			xfs_bmap_last_offset(NULL, ip, &last_block,
822 				XFS_DATA_FORK);
823 			last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
824 			if ((map_start_fsb + count_fsb) > last_block) {
825 				count_fsb = last_block - map_start_fsb;
826 				if (count_fsb == 0) {
827 					error = EAGAIN;
828 					goto trans_cancel;
829 				}
830 			}
831 
832 			/* Go get the actual blocks */
833 			error = XFS_BMAPI(mp, tp, io, map_start_fsb, count_fsb,
834 					XFS_BMAPI_WRITE, &first_block, 1,
835 					imap, &nimaps, &free_list, NULL);
836 			if (error)
837 				goto trans_cancel;
838 
839 			error = xfs_bmap_finish(&tp, &free_list,
840 					first_block, &committed);
841 			if (error)
842 				goto trans_cancel;
843 
844 			error = xfs_trans_commit(tp,
845 					XFS_TRANS_RELEASE_LOG_RES, NULL);
846 			if (error)
847 				goto error0;
848 
849 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
850 		}
851 
852 		/*
853 		 * See if we were able to allocate an extent that
854 		 * covers at least part of the callers request
855 		 */
856 
857 		for (i = 0; i < nimaps; i++) {
858 			if (!(io->io_flags & XFS_IOCORE_RT)  &&
859 			    !imap[i].br_startblock) {
860 				cmn_err(CE_PANIC,"Access to block zero:  "
861 					"fs <%s> inode: %lld "
862 					"start_block : %llx start_off : %llx "
863 					"blkcnt : %llx extent-state : %x \n",
864 					(ip->i_mount)->m_fsname,
865 					(long long)ip->i_ino,
866 					(unsigned long long)
867 						imap[i].br_startblock,
868 					(unsigned long long)
869 						imap[i].br_startoff,
870 					(unsigned long long)
871 				        	imap[i].br_blockcount,
872 					imap[i].br_state);
873                         }
874 			if ((offset_fsb >= imap[i].br_startoff) &&
875 			    (offset_fsb < (imap[i].br_startoff +
876 					   imap[i].br_blockcount))) {
877 				*map = imap[i];
878 				*retmap = 1;
879 				XFS_STATS_INC(xs_xstrat_quick);
880 				return 0;
881 			}
882 			count_fsb -= imap[i].br_blockcount;
883 		}
884 
885 		/* So far we have not mapped the requested part of the
886 		 * file, just surrounding data, try again.
887 		 */
888 		nimaps--;
889 		map_start_fsb = imap[nimaps].br_startoff +
890 				imap[nimaps].br_blockcount;
891 	}
892 
893 trans_cancel:
894 	xfs_bmap_cancel(&free_list);
895 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
896 error0:
897 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
898 	return XFS_ERROR(error);
899 }
900 
901 int
902 xfs_iomap_write_unwritten(
903 	xfs_inode_t	*ip,
904 	xfs_off_t	offset,
905 	size_t		count)
906 {
907 	xfs_mount_t	*mp = ip->i_mount;
908 	xfs_iocore_t    *io = &ip->i_iocore;
909 	xfs_fileoff_t	offset_fsb;
910 	xfs_filblks_t	count_fsb;
911 	xfs_filblks_t	numblks_fsb;
912 	xfs_fsblock_t	firstfsb;
913 	int		nimaps;
914 	xfs_trans_t	*tp;
915 	xfs_bmbt_irec_t imap;
916 	xfs_bmap_free_t free_list;
917 	uint		resblks;
918 	int		committed;
919 	int		error;
920 
921 	xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN,
922 				&ip->i_iocore, offset, count);
923 
924 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
925 	count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
926 	count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
927 
928 	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
929 
930 	do {
931 		/*
932 		 * set up a transaction to convert the range of extents
933 		 * from unwritten to real. Do allocations in a loop until
934 		 * we have covered the range passed in.
935 		 */
936 
937 		tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
938 		error = xfs_trans_reserve(tp, resblks,
939 				XFS_WRITE_LOG_RES(mp), 0,
940 				XFS_TRANS_PERM_LOG_RES,
941 				XFS_WRITE_LOG_COUNT);
942 		if (error) {
943 			xfs_trans_cancel(tp, 0);
944 			goto error0;
945 		}
946 
947 		xfs_ilock(ip, XFS_ILOCK_EXCL);
948 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
949 		xfs_trans_ihold(tp, ip);
950 
951 		/*
952 		 * Modify the unwritten extent state of the buffer.
953 		 */
954 		XFS_BMAP_INIT(&free_list, &firstfsb);
955 		nimaps = 1;
956 		error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb,
957 				  XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
958 				  1, &imap, &nimaps, &free_list, NULL);
959 		if (error)
960 			goto error_on_bmapi_transaction;
961 
962 		error = xfs_bmap_finish(&(tp), &(free_list),
963 				firstfsb, &committed);
964 		if (error)
965 			goto error_on_bmapi_transaction;
966 
967 		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
968 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
969 		if (error)
970 			goto error0;
971 
972 		if ( !(io->io_flags & XFS_IOCORE_RT)  && !imap.br_startblock) {
973 			cmn_err(CE_PANIC,"Access to block zero:  fs <%s> "
974 				"inode: %lld start_block : %llx start_off : "
975 				"%llx blkcnt : %llx extent-state : %x \n",
976 				(ip->i_mount)->m_fsname,
977 				(long long)ip->i_ino,
978 				(unsigned long long)imap.br_startblock,
979 				(unsigned long long)imap.br_startoff,
980 				(unsigned long long)imap.br_blockcount,
981 				imap.br_state);
982         	}
983 
984 		if ((numblks_fsb = imap.br_blockcount) == 0) {
985 			/*
986 			 * The numblks_fsb value should always get
987 			 * smaller, otherwise the loop is stuck.
988 			 */
989 			ASSERT(imap.br_blockcount);
990 			break;
991 		}
992 		offset_fsb += numblks_fsb;
993 		count_fsb -= numblks_fsb;
994 	} while (count_fsb > 0);
995 
996 	return 0;
997 
998 error_on_bmapi_transaction:
999 	xfs_bmap_cancel(&free_list);
1000 	xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
1001 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1002 error0:
1003 	return XFS_ERROR(error);
1004 }
1005