1 // SPDX-License-Identifier: BSD-2-Clause
2 /*
3 * Copyright (c) 2020 iXsystems, Inc.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 */
28
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/dmu.h>
32 #include <sys/dmu_impl.h>
33 #include <sys/dmu_tx.h>
34 #include <sys/dbuf.h>
35 #include <sys/dnode.h>
36 #include <sys/zfs_context.h>
37 #include <sys/dmu_objset.h>
38 #include <sys/dmu_traverse.h>
39 #include <sys/dsl_dataset.h>
40 #include <sys/dsl_dir.h>
41 #include <sys/dsl_pool.h>
42 #include <sys/dsl_synctask.h>
43 #include <sys/dsl_prop.h>
44 #include <sys/zfs_ioctl.h>
45 #include <sys/zap.h>
46 #include <sys/zio_checksum.h>
47 #include <sys/zio_compress.h>
48 #include <sys/sa.h>
49 #include <sys/zfeature.h>
50 #include <sys/abd.h>
51 #include <sys/zfs_rlock.h>
52 #include <sys/racct.h>
53 #include <sys/vm.h>
54 #include <sys/zfs_znode.h>
55 #include <sys/zfs_vnops.h>
56
57 #include <sys/ccompat.h>
58
59 #ifndef IDX_TO_OFF
60 #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
61 #endif
62
63 #define VM_ALLOC_BUSY_FLAGS VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY
64
65 int
dmu_write_pages(objset_t * os,uint64_t object,uint64_t offset,uint64_t size,vm_page_t * ma,dmu_tx_t * tx)66 dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
67 vm_page_t *ma, dmu_tx_t *tx)
68 {
69 dmu_buf_t **dbp;
70 struct sf_buf *sf;
71 int numbufs, i;
72 int err;
73 dmu_flags_t flags = 0;
74
75 if (size == 0)
76 return (0);
77
78 err = dmu_buf_hold_array(os, object, offset, size,
79 FALSE, FTAG, &numbufs, &dbp);
80 if (err)
81 return (err);
82
83 for (i = 0; i < numbufs; i++) {
84 int tocpy, copied, thiscpy;
85 int bufoff;
86 dmu_buf_t *db = dbp[i];
87 caddr_t va;
88
89 ASSERT3U(size, >, 0);
90 ASSERT3U(db->db_size, >=, PAGESIZE);
91
92 bufoff = offset - db->db_offset;
93 tocpy = (int)MIN(db->db_size - bufoff, size);
94
95 ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
96
97 if (tocpy == db->db_size) {
98 dmu_buf_will_fill(db, tx, B_FALSE);
99 } else {
100 if (i == numbufs - 1 && bufoff + tocpy < db->db_size) {
101 if (bufoff == 0)
102 flags |= DMU_PARTIAL_FIRST;
103 else
104 flags |= DMU_PARTIAL_MORE;
105 }
106 dmu_buf_will_dirty_flags(db, tx, flags);
107 }
108
109 for (copied = 0; copied < tocpy; copied += PAGESIZE) {
110 ASSERT3U(ptoa((*ma)->pindex), ==,
111 db->db_offset + bufoff);
112 thiscpy = MIN(PAGESIZE, tocpy - copied);
113 va = zfs_map_page(*ma, &sf);
114 ASSERT(db->db_data != NULL);
115 memcpy((char *)db->db_data + bufoff, va, thiscpy);
116 zfs_unmap_page(sf);
117 ma += 1;
118 bufoff += PAGESIZE;
119 }
120
121 if (tocpy == db->db_size)
122 dmu_buf_fill_done(db, tx, B_FALSE);
123
124 offset += tocpy;
125 size -= tocpy;
126 }
127 dmu_buf_rele_array(dbp, numbufs, FTAG);
128 return (err);
129 }
130
131 int
dmu_read_pages(objset_t * os,uint64_t object,vm_page_t * ma,int count,int * rbehind,int * rahead,int last_size)132 dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
133 int *rbehind, int *rahead, int last_size)
134 {
135 struct sf_buf *sf;
136 vm_object_t vmobj;
137 vm_page_t m;
138 dmu_buf_t **dbp;
139 dmu_buf_t *db;
140 caddr_t va;
141 int numbufs, i;
142 int bufoff, pgoff, tocpy;
143 int mi, di;
144 int err;
145
146 ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex);
147 ASSERT3S(last_size, <=, PAGE_SIZE);
148
149 err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex),
150 IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp);
151 if (err != 0)
152 return (err);
153
154 #ifdef ZFS_DEBUG
155 IMPLY(last_size < PAGE_SIZE, *rahead == 0);
156 if (dbp[0]->db_offset != 0 || numbufs > 1) {
157 for (i = 0; i < numbufs; i++) {
158 ASSERT(ISP2(dbp[i]->db_size));
159 ASSERT3U((dbp[i]->db_offset % dbp[i]->db_size), ==, 0);
160 ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size);
161 }
162 }
163 #endif
164
165 vmobj = ma[0]->object;
166
167 db = dbp[0];
168 for (i = 0; i < *rbehind; i++) {
169 m = vm_page_grab_unlocked(vmobj, ma[0]->pindex - 1 - i,
170 VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
171 if (m == NULL)
172 break;
173 if (!vm_page_none_valid(m)) {
174 ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
175 vm_page_sunbusy(m);
176 break;
177 }
178 ASSERT3U(m->dirty, ==, 0);
179 ASSERT(!pmap_page_is_write_mapped(m));
180
181 ASSERT3U(db->db_size, >, PAGE_SIZE);
182 bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
183 va = zfs_map_page(m, &sf);
184 ASSERT(db->db_data != NULL);
185 memcpy(va, (char *)db->db_data + bufoff, PAGESIZE);
186 zfs_unmap_page(sf);
187 vm_page_valid(m);
188 if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
189 vm_page_activate(m);
190 else
191 vm_page_deactivate(m);
192 vm_page_sunbusy(m);
193 }
194 *rbehind = i;
195
196 bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size;
197 pgoff = 0;
198 for (mi = 0, di = 0; mi < count && di < numbufs; ) {
199 if (pgoff == 0) {
200 m = ma[mi];
201 if (m != bogus_page) {
202 vm_page_assert_xbusied(m);
203 ASSERT(vm_page_none_valid(m));
204 ASSERT3U(m->dirty, ==, 0);
205 ASSERT(!pmap_page_is_write_mapped(m));
206 va = zfs_map_page(m, &sf);
207 }
208 }
209 if (bufoff == 0)
210 db = dbp[di];
211
212 if (m != bogus_page) {
213 ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==,
214 db->db_offset + bufoff);
215 }
216
217 /*
218 * We do not need to clamp the copy size by the file
219 * size as the last block is zero-filled beyond the
220 * end of file anyway.
221 */
222 tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff);
223 ASSERT3S(tocpy, >=, 0);
224 if (m != bogus_page) {
225 ASSERT(db->db_data != NULL);
226 memcpy(va + pgoff, (char *)db->db_data + bufoff, tocpy);
227 }
228
229 pgoff += tocpy;
230 ASSERT3S(pgoff, >=, 0);
231 ASSERT3S(pgoff, <=, PAGESIZE);
232 if (pgoff == PAGESIZE) {
233 if (m != bogus_page) {
234 zfs_unmap_page(sf);
235 vm_page_valid(m);
236 }
237 ASSERT3S(mi, <, count);
238 mi++;
239 pgoff = 0;
240 }
241
242 bufoff += tocpy;
243 ASSERT3S(bufoff, >=, 0);
244 ASSERT3S(bufoff, <=, db->db_size);
245 if (bufoff == db->db_size) {
246 ASSERT3S(di, <, numbufs);
247 di++;
248 bufoff = 0;
249 }
250 }
251
252 #ifdef ZFS_DEBUG
253 /*
254 * Three possibilities:
255 * - last requested page ends at a buffer boundary and , thus,
256 * all pages and buffers have been iterated;
257 * - all requested pages are filled, but the last buffer
258 * has not been exhausted;
259 * the read-ahead is possible only in this case;
260 * - all buffers have been read, but the last page has not been
261 * fully filled;
262 * this is only possible if the file has only a single buffer
263 * with a size that is not a multiple of the page size.
264 */
265 if (mi == count) {
266 ASSERT3S(di, >=, numbufs - 1);
267 IMPLY(*rahead != 0, di == numbufs - 1);
268 IMPLY(*rahead != 0, bufoff != 0);
269 ASSERT0(pgoff);
270 }
271 if (di == numbufs) {
272 ASSERT3S(mi, >=, count - 1);
273 ASSERT0(*rahead);
274 IMPLY(pgoff == 0, mi == count);
275 if (pgoff != 0) {
276 ASSERT3S(mi, ==, count - 1);
277 ASSERT3U((dbp[0]->db_size & PAGE_MASK), !=, 0);
278 }
279 }
280 #endif
281 if (pgoff != 0) {
282 ASSERT3P(m, !=, bogus_page);
283 memset(va + pgoff, 0, PAGESIZE - pgoff);
284 zfs_unmap_page(sf);
285 vm_page_valid(m);
286 }
287
288 for (i = 0; i < *rahead; i++) {
289 m = vm_page_grab_unlocked(vmobj, ma[count - 1]->pindex + 1 + i,
290 VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_BUSY_FLAGS);
291 if (m == NULL)
292 break;
293 if (!vm_page_none_valid(m)) {
294 ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
295 vm_page_sunbusy(m);
296 break;
297 }
298 ASSERT3U(m->dirty, ==, 0);
299 ASSERT(!pmap_page_is_write_mapped(m));
300
301 ASSERT3U(db->db_size, >, PAGE_SIZE);
302 bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
303 tocpy = MIN(db->db_size - bufoff, PAGESIZE);
304 va = zfs_map_page(m, &sf);
305 ASSERT(db->db_data != NULL);
306 memcpy(va, (char *)db->db_data + bufoff, tocpy);
307 if (tocpy < PAGESIZE) {
308 ASSERT3S(i, ==, *rahead - 1);
309 ASSERT3U((db->db_size & PAGE_MASK), !=, 0);
310 memset(va + tocpy, 0, PAGESIZE - tocpy);
311 }
312 zfs_unmap_page(sf);
313 vm_page_valid(m);
314 if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
315 vm_page_activate(m);
316 else
317 vm_page_deactivate(m);
318 vm_page_sunbusy(m);
319 }
320 *rahead = i;
321
322 dmu_buf_rele_array(dbp, numbufs, FTAG);
323 return (0);
324 }
325