xref: /freebsd/sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c (revision 8ac904ce090b1c2e355da8aa122ca2252183f4e1)
1 // SPDX-License-Identifier: BSD-2-Clause
2 /*
3  * Copyright (c) 2020 iXsystems, Inc.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  */
28 
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/dmu.h>
32 #include <sys/dmu_impl.h>
33 #include <sys/dmu_tx.h>
34 #include <sys/dbuf.h>
35 #include <sys/dnode.h>
36 #include <sys/zfs_context.h>
37 #include <sys/dmu_objset.h>
38 #include <sys/dmu_traverse.h>
39 #include <sys/dsl_dataset.h>
40 #include <sys/dsl_dir.h>
41 #include <sys/dsl_pool.h>
42 #include <sys/dsl_synctask.h>
43 #include <sys/dsl_prop.h>
44 #include <sys/zfs_ioctl.h>
45 #include <sys/zap.h>
46 #include <sys/zio_checksum.h>
47 #include <sys/zio_compress.h>
48 #include <sys/sa.h>
49 #include <sys/zfeature.h>
50 #include <sys/abd.h>
51 #include <sys/zfs_rlock.h>
52 #include <sys/racct.h>
53 #include <sys/vm.h>
54 #include <sys/zfs_znode.h>
55 #include <sys/zfs_vnops.h>
56 
57 #include <sys/ccompat.h>
58 
59 #ifndef IDX_TO_OFF
60 #define	IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
61 #endif
62 
63 #define	VM_ALLOC_BUSY_FLAGS  VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY
64 
65 int
dmu_write_pages(objset_t * os,uint64_t object,uint64_t offset,uint64_t size,vm_page_t * ma,dmu_tx_t * tx)66 dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
67     vm_page_t *ma, dmu_tx_t *tx)
68 {
69 	dmu_buf_t **dbp;
70 	struct sf_buf *sf;
71 	int numbufs, i;
72 	int err;
73 	dmu_flags_t flags = 0;
74 
75 	if (size == 0)
76 		return (0);
77 
78 	err = dmu_buf_hold_array(os, object, offset, size,
79 	    FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH);
80 	if (err)
81 		return (err);
82 
83 	for (i = 0; i < numbufs; i++) {
84 		int tocpy, copied, thiscpy;
85 		int bufoff;
86 		dmu_buf_t *db = dbp[i];
87 		caddr_t va;
88 
89 		ASSERT3U(size, >, 0);
90 		ASSERT3U(db->db_size, >=, PAGESIZE);
91 
92 		bufoff = offset - db->db_offset;
93 		tocpy = (int)MIN(db->db_size - bufoff, size);
94 
95 		ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
96 
97 		if (tocpy == db->db_size) {
98 			dmu_buf_will_fill(db, tx, B_FALSE);
99 		} else {
100 			if (i == numbufs - 1 && bufoff + tocpy < db->db_size) {
101 				if (bufoff == 0)
102 					flags |= DMU_PARTIAL_FIRST;
103 				else
104 					flags |= DMU_PARTIAL_MORE;
105 			}
106 			dmu_buf_will_dirty_flags(db, tx, flags);
107 		}
108 
109 		for (copied = 0; copied < tocpy; copied += PAGESIZE) {
110 			ASSERT3U(ptoa((*ma)->pindex), ==,
111 			    db->db_offset + bufoff);
112 			thiscpy = MIN(PAGESIZE, tocpy - copied);
113 			va = zfs_map_page(*ma, &sf);
114 			ASSERT(db->db_data != NULL);
115 			memcpy((char *)db->db_data + bufoff, va, thiscpy);
116 			zfs_unmap_page(sf);
117 			ma += 1;
118 			bufoff += PAGESIZE;
119 		}
120 
121 		if (tocpy == db->db_size)
122 			dmu_buf_fill_done(db, tx, B_FALSE);
123 
124 		offset += tocpy;
125 		size -= tocpy;
126 	}
127 	dmu_buf_rele_array(dbp, numbufs, FTAG);
128 	return (err);
129 }
130 
131 int
dmu_read_pages(objset_t * os,uint64_t object,vm_page_t * ma,int count,int * rbehind,int * rahead,int last_size)132 dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
133     int *rbehind, int *rahead, int last_size)
134 {
135 	struct sf_buf *sf;
136 	vm_object_t vmobj;
137 	vm_page_t m;
138 	dmu_buf_t **dbp;
139 	dmu_buf_t *db;
140 	caddr_t va;
141 	int numbufs, i;
142 	int bufoff, pgoff, tocpy;
143 	int mi, di;
144 	int err;
145 
146 	ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex);
147 	ASSERT3S(last_size, <=, PAGE_SIZE);
148 
149 	err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex),
150 	    IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp,
151 	    DMU_READ_PREFETCH);
152 	if (err != 0)
153 		return (err);
154 
155 #ifdef ZFS_DEBUG
156 	IMPLY(last_size < PAGE_SIZE, *rahead == 0);
157 	if (dbp[0]->db_offset != 0 || numbufs > 1) {
158 		for (i = 0; i < numbufs; i++) {
159 			ASSERT(ISP2(dbp[i]->db_size));
160 			ASSERT0((dbp[i]->db_offset % dbp[i]->db_size));
161 			ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size);
162 		}
163 	}
164 #endif
165 
166 	vmobj = ma[0]->object;
167 
168 	db = dbp[0];
169 	for (i = 0; i < *rbehind; i++) {
170 		m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i,
171 		    VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
172 		if (m == NULL)
173 			break;
174 		if (!vm_page_none_valid(m)) {
175 			ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
176 			vm_page_sunbusy(m);
177 			break;
178 		}
179 		ASSERT0(m->dirty);
180 		ASSERT(!pmap_page_is_write_mapped(m));
181 
182 		ASSERT3U(db->db_size, >, PAGE_SIZE);
183 		bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
184 		va = zfs_map_page(m, &sf);
185 		ASSERT(db->db_data != NULL);
186 		memcpy(va, (char *)db->db_data + bufoff, PAGESIZE);
187 		zfs_unmap_page(sf);
188 		vm_page_valid(m);
189 		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
190 			vm_page_activate(m);
191 		else
192 			vm_page_deactivate(m);
193 		vm_page_sunbusy(m);
194 	}
195 	*rbehind = i;
196 
197 	bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size;
198 	pgoff = 0;
199 	for (mi = 0, di = 0; mi < count && di < numbufs; ) {
200 		if (pgoff == 0) {
201 			m = ma[mi];
202 			if (m != bogus_page) {
203 				vm_page_assert_xbusied(m);
204 				ASSERT(vm_page_none_valid(m));
205 				ASSERT0(m->dirty);
206 				ASSERT(!pmap_page_is_write_mapped(m));
207 				va = zfs_map_page(m, &sf);
208 			}
209 		}
210 		if (bufoff == 0)
211 			db = dbp[di];
212 
213 		if (m != bogus_page) {
214 			ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==,
215 			    db->db_offset + bufoff);
216 		}
217 
218 		/*
219 		 * We do not need to clamp the copy size by the file
220 		 * size as the last block is zero-filled beyond the
221 		 * end of file anyway.
222 		 */
223 		tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff);
224 		ASSERT3S(tocpy, >=, 0);
225 		if (m != bogus_page) {
226 			ASSERT(db->db_data != NULL);
227 			memcpy(va + pgoff, (char *)db->db_data + bufoff, tocpy);
228 		}
229 
230 		pgoff += tocpy;
231 		ASSERT3S(pgoff, >=, 0);
232 		ASSERT3S(pgoff, <=, PAGESIZE);
233 		if (pgoff == PAGESIZE) {
234 			if (m != bogus_page) {
235 				zfs_unmap_page(sf);
236 				vm_page_valid(m);
237 			}
238 			ASSERT3S(mi, <, count);
239 			mi++;
240 			pgoff = 0;
241 		}
242 
243 		bufoff += tocpy;
244 		ASSERT3S(bufoff, >=, 0);
245 		ASSERT3S(bufoff, <=, db->db_size);
246 		if (bufoff == db->db_size) {
247 			ASSERT3S(di, <, numbufs);
248 			di++;
249 			bufoff = 0;
250 		}
251 	}
252 
253 #ifdef ZFS_DEBUG
254 	/*
255 	 * Three possibilities:
256 	 * - last requested page ends at a buffer boundary and , thus,
257 	 *   all pages and buffers have been iterated;
258 	 * - all requested pages are filled, but the last buffer
259 	 *   has not been exhausted;
260 	 *   the read-ahead is possible only in this case;
261 	 * - all buffers have been read, but the last page has not been
262 	 *   fully filled;
263 	 *   this is only possible if the file has only a single buffer
264 	 *   with a size that is not a multiple of the page size.
265 	 */
266 	if (mi == count) {
267 		ASSERT3S(di, >=, numbufs - 1);
268 		IMPLY(*rahead != 0, di == numbufs - 1);
269 		IMPLY(*rahead != 0, bufoff != 0);
270 		ASSERT0(pgoff);
271 	}
272 	if (di == numbufs) {
273 		ASSERT3S(mi, >=, count - 1);
274 		ASSERT0(*rahead);
275 		IMPLY(pgoff == 0, mi == count);
276 		if (pgoff != 0) {
277 			ASSERT3S(mi, ==, count - 1);
278 			ASSERT3U((dbp[0]->db_size & PAGE_MASK), !=, 0);
279 		}
280 	}
281 #endif
282 	if (pgoff != 0) {
283 		ASSERT3P(m, !=, bogus_page);
284 		memset(va + pgoff, 0, PAGESIZE - pgoff);
285 		zfs_unmap_page(sf);
286 		vm_page_valid(m);
287 	}
288 
289 	for (i = 0; i < *rahead; i++) {
290 		m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i,
291 		    VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
292 		if (m == NULL)
293 			break;
294 		if (!vm_page_none_valid(m)) {
295 			ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
296 			vm_page_sunbusy(m);
297 			break;
298 		}
299 		ASSERT0(m->dirty);
300 		ASSERT(!pmap_page_is_write_mapped(m));
301 
302 		ASSERT3U(db->db_size, >, PAGE_SIZE);
303 		bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
304 		tocpy = MIN(db->db_size - bufoff, PAGESIZE);
305 		va = zfs_map_page(m, &sf);
306 		ASSERT(db->db_data != NULL);
307 		memcpy(va, (char *)db->db_data + bufoff, tocpy);
308 		if (tocpy < PAGESIZE) {
309 			ASSERT3S(i, ==, *rahead - 1);
310 			ASSERT3U((db->db_size & PAGE_MASK), !=, 0);
311 			memset(va + tocpy, 0, PAGESIZE - tocpy);
312 		}
313 		zfs_unmap_page(sf);
314 		vm_page_valid(m);
315 		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
316 			vm_page_activate(m);
317 		else
318 			vm_page_deactivate(m);
319 		vm_page_sunbusy(m);
320 	}
321 	*rahead = i;
322 
323 	dmu_buf_rele_array(dbp, numbufs, FTAG);
324 	return (0);
325 }
326