1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2013 by Delphix. All rights reserved.
28 */
29
30 #include <sys/zfs_context.h>
31 #include <sys/dnode.h>
32 #include <sys/dmu_objset.h>
33 #include <sys/dmu_zfetch.h>
34 #include <sys/dmu.h>
35 #include <sys/dbuf.h>
36 #include <sys/kstat.h>
37
38 /*
39 * I'm against tune-ables, but these should probably exist as tweakable globals
40 * until we can get this working the way we want it to.
41 */
42
43 int zfs_prefetch_disable = 0;
44
45 /* max # of streams per zfetch */
46 uint32_t zfetch_max_streams = 8;
47 /* min time before stream reclaim */
48 uint32_t zfetch_min_sec_reap = 2;
49 /* max number of blocks to fetch at a time */
50 uint32_t zfetch_block_cap = 256;
51 /* number of bytes in a array_read at which we stop prefetching (1Mb) */
52 uint64_t zfetch_array_rd_sz = 1024 * 1024;
53
54 /* forward decls for static routines */
55 static boolean_t dmu_zfetch_colinear(zfetch_t *, zstream_t *);
56 static void dmu_zfetch_dofetch(zfetch_t *, zstream_t *);
57 static uint64_t dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t);
58 static uint64_t dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t);
59 static boolean_t dmu_zfetch_find(zfetch_t *, zstream_t *, int);
60 static int dmu_zfetch_stream_insert(zfetch_t *, zstream_t *);
61 static zstream_t *dmu_zfetch_stream_reclaim(zfetch_t *);
62 static void dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
63 static int dmu_zfetch_streams_equal(zstream_t *, zstream_t *);
64
65 typedef struct zfetch_stats {
66 kstat_named_t zfetchstat_hits;
67 kstat_named_t zfetchstat_misses;
68 kstat_named_t zfetchstat_colinear_hits;
69 kstat_named_t zfetchstat_colinear_misses;
70 kstat_named_t zfetchstat_stride_hits;
71 kstat_named_t zfetchstat_stride_misses;
72 kstat_named_t zfetchstat_reclaim_successes;
73 kstat_named_t zfetchstat_reclaim_failures;
74 kstat_named_t zfetchstat_stream_resets;
75 kstat_named_t zfetchstat_stream_noresets;
76 kstat_named_t zfetchstat_bogus_streams;
77 } zfetch_stats_t;
78
79 static zfetch_stats_t zfetch_stats = {
80 { "hits", KSTAT_DATA_UINT64 },
81 { "misses", KSTAT_DATA_UINT64 },
82 { "colinear_hits", KSTAT_DATA_UINT64 },
83 { "colinear_misses", KSTAT_DATA_UINT64 },
84 { "stride_hits", KSTAT_DATA_UINT64 },
85 { "stride_misses", KSTAT_DATA_UINT64 },
86 { "reclaim_successes", KSTAT_DATA_UINT64 },
87 { "reclaim_failures", KSTAT_DATA_UINT64 },
88 { "streams_resets", KSTAT_DATA_UINT64 },
89 { "streams_noresets", KSTAT_DATA_UINT64 },
90 { "bogus_streams", KSTAT_DATA_UINT64 },
91 };
92
93 #define ZFETCHSTAT_INCR(stat, val) \
94 atomic_add_64(&zfetch_stats.stat.value.ui64, (val));
95
96 #define ZFETCHSTAT_BUMP(stat) ZFETCHSTAT_INCR(stat, 1);
97
98 kstat_t *zfetch_ksp;
99
100 /*
101 * Given a zfetch structure and a zstream structure, determine whether the
102 * blocks to be read are part of a co-linear pair of existing prefetch
103 * streams. If a set is found, coalesce the streams, removing one, and
104 * configure the prefetch so it looks for a strided access pattern.
105 *
106 * In other words: if we find two sequential access streams that are
107 * the same length and distance N appart, and this read is N from the
108 * last stream, then we are probably in a strided access pattern. So
109 * combine the two sequential streams into a single strided stream.
110 *
111 * Returns whether co-linear streams were found.
112 */
113 static boolean_t
dmu_zfetch_colinear(zfetch_t * zf,zstream_t * zh)114 dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh)
115 {
116 zstream_t *z_walk;
117 zstream_t *z_comp;
118
119 if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
120 return (0);
121
122 if (zh == NULL) {
123 rw_exit(&zf->zf_rwlock);
124 return (0);
125 }
126
127 for (z_walk = list_head(&zf->zf_stream); z_walk;
128 z_walk = list_next(&zf->zf_stream, z_walk)) {
129 for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp;
130 z_comp = list_next(&zf->zf_stream, z_comp)) {
131 int64_t diff;
132
133 if (z_walk->zst_len != z_walk->zst_stride ||
134 z_comp->zst_len != z_comp->zst_stride) {
135 continue;
136 }
137
138 diff = z_comp->zst_offset - z_walk->zst_offset;
139 if (z_comp->zst_offset + diff == zh->zst_offset) {
140 z_walk->zst_offset = zh->zst_offset;
141 z_walk->zst_direction = diff < 0 ? -1 : 1;
142 z_walk->zst_stride =
143 diff * z_walk->zst_direction;
144 z_walk->zst_ph_offset =
145 zh->zst_offset + z_walk->zst_stride;
146 dmu_zfetch_stream_remove(zf, z_comp);
147 mutex_destroy(&z_comp->zst_lock);
148 kmem_free(z_comp, sizeof (zstream_t));
149
150 dmu_zfetch_dofetch(zf, z_walk);
151
152 rw_exit(&zf->zf_rwlock);
153 return (1);
154 }
155
156 diff = z_walk->zst_offset - z_comp->zst_offset;
157 if (z_walk->zst_offset + diff == zh->zst_offset) {
158 z_walk->zst_offset = zh->zst_offset;
159 z_walk->zst_direction = diff < 0 ? -1 : 1;
160 z_walk->zst_stride =
161 diff * z_walk->zst_direction;
162 z_walk->zst_ph_offset =
163 zh->zst_offset + z_walk->zst_stride;
164 dmu_zfetch_stream_remove(zf, z_comp);
165 mutex_destroy(&z_comp->zst_lock);
166 kmem_free(z_comp, sizeof (zstream_t));
167
168 dmu_zfetch_dofetch(zf, z_walk);
169
170 rw_exit(&zf->zf_rwlock);
171 return (1);
172 }
173 }
174 }
175
176 rw_exit(&zf->zf_rwlock);
177 return (0);
178 }
179
180 /*
181 * Given a zstream_t, determine the bounds of the prefetch. Then call the
182 * routine that actually prefetches the individual blocks.
183 */
184 static void
dmu_zfetch_dofetch(zfetch_t * zf,zstream_t * zs)185 dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs)
186 {
187 uint64_t prefetch_tail;
188 uint64_t prefetch_limit;
189 uint64_t prefetch_ofst;
190 uint64_t prefetch_len;
191 uint64_t blocks_fetched;
192
193 zs->zst_stride = MAX((int64_t)zs->zst_stride, zs->zst_len);
194 zs->zst_cap = MIN(zfetch_block_cap, 2 * zs->zst_cap);
195
196 prefetch_tail = MAX((int64_t)zs->zst_ph_offset,
197 (int64_t)(zs->zst_offset + zs->zst_stride));
198 /*
199 * XXX: use a faster division method?
200 */
201 prefetch_limit = zs->zst_offset + zs->zst_len +
202 (zs->zst_cap * zs->zst_stride) / zs->zst_len;
203
204 while (prefetch_tail < prefetch_limit) {
205 prefetch_ofst = zs->zst_offset + zs->zst_direction *
206 (prefetch_tail - zs->zst_offset);
207
208 prefetch_len = zs->zst_len;
209
210 /*
211 * Don't prefetch beyond the end of the file, if working
212 * backwards.
213 */
214 if ((zs->zst_direction == ZFETCH_BACKWARD) &&
215 (prefetch_ofst > prefetch_tail)) {
216 prefetch_len += prefetch_ofst;
217 prefetch_ofst = 0;
218 }
219
220 /* don't prefetch more than we're supposed to */
221 if (prefetch_len > zs->zst_len)
222 break;
223
224 blocks_fetched = dmu_zfetch_fetch(zf->zf_dnode,
225 prefetch_ofst, zs->zst_len);
226
227 prefetch_tail += zs->zst_stride;
228 /* stop if we've run out of stuff to prefetch */
229 if (blocks_fetched < zs->zst_len)
230 break;
231 }
232 zs->zst_ph_offset = prefetch_tail;
233 zs->zst_last = ddi_get_lbolt();
234 }
235
236 void
zfetch_init(void)237 zfetch_init(void)
238 {
239
240 zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
241 KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
242 KSTAT_FLAG_VIRTUAL);
243
244 if (zfetch_ksp != NULL) {
245 zfetch_ksp->ks_data = &zfetch_stats;
246 kstat_install(zfetch_ksp);
247 }
248 }
249
250 void
zfetch_fini(void)251 zfetch_fini(void)
252 {
253 if (zfetch_ksp != NULL) {
254 kstat_delete(zfetch_ksp);
255 zfetch_ksp = NULL;
256 }
257 }
258
259 /*
260 * This takes a pointer to a zfetch structure and a dnode. It performs the
261 * necessary setup for the zfetch structure, grokking data from the
262 * associated dnode.
263 */
264 void
dmu_zfetch_init(zfetch_t * zf,dnode_t * dno)265 dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
266 {
267 if (zf == NULL) {
268 return;
269 }
270
271 zf->zf_dnode = dno;
272 zf->zf_stream_cnt = 0;
273 zf->zf_alloc_fail = 0;
274
275 list_create(&zf->zf_stream, sizeof (zstream_t),
276 offsetof(zstream_t, zst_node));
277
278 rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
279 }
280
281 /*
282 * This function computes the actual size, in blocks, that can be prefetched,
283 * and fetches it.
284 */
285 static uint64_t
dmu_zfetch_fetch(dnode_t * dn,uint64_t blkid,uint64_t nblks)286 dmu_zfetch_fetch(dnode_t *dn, uint64_t blkid, uint64_t nblks)
287 {
288 uint64_t fetchsz;
289 uint64_t i;
290
291 fetchsz = dmu_zfetch_fetchsz(dn, blkid, nblks);
292
293 for (i = 0; i < fetchsz; i++) {
294 dbuf_prefetch(dn, 0, blkid + i, ZIO_PRIORITY_ASYNC_READ,
295 ARC_FLAG_PREFETCH);
296 }
297
298 return (fetchsz);
299 }
300
301 /*
302 * this function returns the number of blocks that would be prefetched, based
303 * upon the supplied dnode, blockid, and nblks. This is used so that we can
304 * update streams in place, and then prefetch with their old value after the
305 * fact. This way, we can delay the prefetch, but subsequent accesses to the
306 * stream won't result in the same data being prefetched multiple times.
307 */
308 static uint64_t
dmu_zfetch_fetchsz(dnode_t * dn,uint64_t blkid,uint64_t nblks)309 dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks)
310 {
311 uint64_t fetchsz;
312
313 if (blkid > dn->dn_maxblkid) {
314 return (0);
315 }
316
317 /* compute fetch size */
318 if (blkid + nblks + 1 > dn->dn_maxblkid) {
319 fetchsz = (dn->dn_maxblkid - blkid) + 1;
320 ASSERT(blkid + fetchsz - 1 <= dn->dn_maxblkid);
321 } else {
322 fetchsz = nblks;
323 }
324
325
326 return (fetchsz);
327 }
328
329 /*
330 * given a zfetch and a zstream structure, see if there is an associated zstream
331 * for this block read. If so, it starts a prefetch for the stream it
332 * located and returns true, otherwise it returns false
333 */
334 static boolean_t
dmu_zfetch_find(zfetch_t * zf,zstream_t * zh,int prefetched)335 dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched)
336 {
337 zstream_t *zs;
338 int64_t diff;
339 int reset = !prefetched;
340 int rc = 0;
341
342 if (zh == NULL)
343 return (0);
344
345 /*
346 * XXX: This locking strategy is a bit coarse; however, it's impact has
347 * yet to be tested. If this turns out to be an issue, it can be
348 * modified in a number of different ways.
349 */
350
351 rw_enter(&zf->zf_rwlock, RW_READER);
352 top:
353
354 for (zs = list_head(&zf->zf_stream); zs;
355 zs = list_next(&zf->zf_stream, zs)) {
356
357 /*
358 * XXX - should this be an assert?
359 */
360 if (zs->zst_len == 0) {
361 /* bogus stream */
362 ZFETCHSTAT_BUMP(zfetchstat_bogus_streams);
363 continue;
364 }
365
366 /*
367 * We hit this case when we are in a strided prefetch stream:
368 * we will read "len" blocks before "striding".
369 */
370 if (zh->zst_offset >= zs->zst_offset &&
371 zh->zst_offset < zs->zst_offset + zs->zst_len) {
372 if (prefetched) {
373 /* already fetched */
374 ZFETCHSTAT_BUMP(zfetchstat_stride_hits);
375 rc = 1;
376 goto out;
377 } else {
378 ZFETCHSTAT_BUMP(zfetchstat_stride_misses);
379 }
380 }
381
382 /*
383 * This is the forward sequential read case: we increment
384 * len by one each time we hit here, so we will enter this
385 * case on every read.
386 */
387 if (zh->zst_offset == zs->zst_offset + zs->zst_len) {
388
389 reset = !prefetched && zs->zst_len > 1;
390
391 mutex_enter(&zs->zst_lock);
392
393 if (zh->zst_offset != zs->zst_offset + zs->zst_len) {
394 mutex_exit(&zs->zst_lock);
395 goto top;
396 }
397 zs->zst_len += zh->zst_len;
398 diff = zs->zst_len - zfetch_block_cap;
399 if (diff > 0) {
400 zs->zst_offset += diff;
401 zs->zst_len = zs->zst_len > diff ?
402 zs->zst_len - diff : 0;
403 }
404 zs->zst_direction = ZFETCH_FORWARD;
405
406 break;
407
408 /*
409 * Same as above, but reading backwards through the file.
410 */
411 } else if (zh->zst_offset == zs->zst_offset - zh->zst_len) {
412 /* backwards sequential access */
413
414 reset = !prefetched && zs->zst_len > 1;
415
416 mutex_enter(&zs->zst_lock);
417
418 if (zh->zst_offset != zs->zst_offset - zh->zst_len) {
419 mutex_exit(&zs->zst_lock);
420 goto top;
421 }
422
423 zs->zst_offset = zs->zst_offset > zh->zst_len ?
424 zs->zst_offset - zh->zst_len : 0;
425 zs->zst_ph_offset = zs->zst_ph_offset > zh->zst_len ?
426 zs->zst_ph_offset - zh->zst_len : 0;
427 zs->zst_len += zh->zst_len;
428
429 diff = zs->zst_len - zfetch_block_cap;
430 if (diff > 0) {
431 zs->zst_ph_offset = zs->zst_ph_offset > diff ?
432 zs->zst_ph_offset - diff : 0;
433 zs->zst_len = zs->zst_len > diff ?
434 zs->zst_len - diff : zs->zst_len;
435 }
436 zs->zst_direction = ZFETCH_BACKWARD;
437
438 break;
439
440 } else if ((zh->zst_offset - zs->zst_offset - zs->zst_stride <
441 zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
442 /* strided forward access */
443
444 mutex_enter(&zs->zst_lock);
445
446 if ((zh->zst_offset - zs->zst_offset - zs->zst_stride >=
447 zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
448 mutex_exit(&zs->zst_lock);
449 goto top;
450 }
451
452 zs->zst_offset += zs->zst_stride;
453 zs->zst_direction = ZFETCH_FORWARD;
454
455 break;
456
457 } else if ((zh->zst_offset - zs->zst_offset + zs->zst_stride <
458 zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
459 /* strided reverse access */
460
461 mutex_enter(&zs->zst_lock);
462
463 if ((zh->zst_offset - zs->zst_offset + zs->zst_stride >=
464 zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
465 mutex_exit(&zs->zst_lock);
466 goto top;
467 }
468
469 zs->zst_offset = zs->zst_offset > zs->zst_stride ?
470 zs->zst_offset - zs->zst_stride : 0;
471 zs->zst_ph_offset = (zs->zst_ph_offset >
472 (2 * zs->zst_stride)) ?
473 (zs->zst_ph_offset - (2 * zs->zst_stride)) : 0;
474 zs->zst_direction = ZFETCH_BACKWARD;
475
476 break;
477 }
478 }
479
480 if (zs) {
481 if (reset) {
482 zstream_t *remove = zs;
483
484 ZFETCHSTAT_BUMP(zfetchstat_stream_resets);
485 rc = 0;
486 mutex_exit(&zs->zst_lock);
487 rw_exit(&zf->zf_rwlock);
488 rw_enter(&zf->zf_rwlock, RW_WRITER);
489 /*
490 * Relocate the stream, in case someone removes
491 * it while we were acquiring the WRITER lock.
492 */
493 for (zs = list_head(&zf->zf_stream); zs;
494 zs = list_next(&zf->zf_stream, zs)) {
495 if (zs == remove) {
496 dmu_zfetch_stream_remove(zf, zs);
497 mutex_destroy(&zs->zst_lock);
498 kmem_free(zs, sizeof (zstream_t));
499 break;
500 }
501 }
502 } else {
503 ZFETCHSTAT_BUMP(zfetchstat_stream_noresets);
504 rc = 1;
505 dmu_zfetch_dofetch(zf, zs);
506 mutex_exit(&zs->zst_lock);
507 }
508 }
509 out:
510 rw_exit(&zf->zf_rwlock);
511 return (rc);
512 }
513
514 /*
515 * Clean-up state associated with a zfetch structure. This frees allocated
516 * structure members, empties the zf_stream tree, and generally makes things
517 * nice. This doesn't free the zfetch_t itself, that's left to the caller.
518 */
519 void
dmu_zfetch_rele(zfetch_t * zf)520 dmu_zfetch_rele(zfetch_t *zf)
521 {
522 zstream_t *zs;
523 zstream_t *zs_next;
524
525 ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
526
527 for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
528 zs_next = list_next(&zf->zf_stream, zs);
529
530 list_remove(&zf->zf_stream, zs);
531 mutex_destroy(&zs->zst_lock);
532 kmem_free(zs, sizeof (zstream_t));
533 }
534 list_destroy(&zf->zf_stream);
535 rw_destroy(&zf->zf_rwlock);
536
537 zf->zf_dnode = NULL;
538 }
539
540 /*
541 * Given a zfetch and zstream structure, insert the zstream structure into the
542 * AVL tree contained within the zfetch structure. Peform the appropriate
543 * book-keeping. It is possible that another thread has inserted a stream which
544 * matches one that we are about to insert, so we must be sure to check for this
545 * case. If one is found, return failure, and let the caller cleanup the
546 * duplicates.
547 */
548 static int
dmu_zfetch_stream_insert(zfetch_t * zf,zstream_t * zs)549 dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs)
550 {
551 zstream_t *zs_walk;
552 zstream_t *zs_next;
553
554 ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
555
556 for (zs_walk = list_head(&zf->zf_stream); zs_walk; zs_walk = zs_next) {
557 zs_next = list_next(&zf->zf_stream, zs_walk);
558
559 if (dmu_zfetch_streams_equal(zs_walk, zs)) {
560 return (0);
561 }
562 }
563
564 list_insert_head(&zf->zf_stream, zs);
565 zf->zf_stream_cnt++;
566 return (1);
567 }
568
569
570 /*
571 * Walk the list of zstreams in the given zfetch, find an old one (by time), and
572 * reclaim it for use by the caller.
573 */
574 static zstream_t *
dmu_zfetch_stream_reclaim(zfetch_t * zf)575 dmu_zfetch_stream_reclaim(zfetch_t *zf)
576 {
577 zstream_t *zs;
578
579 if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
580 return (0);
581
582 for (zs = list_head(&zf->zf_stream); zs;
583 zs = list_next(&zf->zf_stream, zs)) {
584
585 if (((ddi_get_lbolt() - zs->zst_last)/hz) > zfetch_min_sec_reap)
586 break;
587 }
588
589 if (zs) {
590 dmu_zfetch_stream_remove(zf, zs);
591 mutex_destroy(&zs->zst_lock);
592 bzero(zs, sizeof (zstream_t));
593 } else {
594 zf->zf_alloc_fail++;
595 }
596 rw_exit(&zf->zf_rwlock);
597
598 return (zs);
599 }
600
601 /*
602 * Given a zfetch and zstream structure, remove the zstream structure from its
603 * container in the zfetch structure. Perform the appropriate book-keeping.
604 */
605 static void
dmu_zfetch_stream_remove(zfetch_t * zf,zstream_t * zs)606 dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
607 {
608 ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
609
610 list_remove(&zf->zf_stream, zs);
611 zf->zf_stream_cnt--;
612 }
613
614 static int
dmu_zfetch_streams_equal(zstream_t * zs1,zstream_t * zs2)615 dmu_zfetch_streams_equal(zstream_t *zs1, zstream_t *zs2)
616 {
617 if (zs1->zst_offset != zs2->zst_offset)
618 return (0);
619
620 if (zs1->zst_len != zs2->zst_len)
621 return (0);
622
623 if (zs1->zst_stride != zs2->zst_stride)
624 return (0);
625
626 if (zs1->zst_ph_offset != zs2->zst_ph_offset)
627 return (0);
628
629 if (zs1->zst_cap != zs2->zst_cap)
630 return (0);
631
632 if (zs1->zst_direction != zs2->zst_direction)
633 return (0);
634
635 return (1);
636 }
637
638 /*
639 * This is the prefetch entry point. It calls all of the other dmu_zfetch
640 * routines to create, delete, find, or operate upon prefetch streams.
641 */
642 void
dmu_zfetch(zfetch_t * zf,uint64_t offset,uint64_t size,int prefetched)643 dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
644 {
645 zstream_t zst;
646 zstream_t *newstream;
647 boolean_t fetched;
648 int inserted;
649 unsigned int blkshft;
650 uint64_t blksz;
651
652 if (zfs_prefetch_disable)
653 return;
654
655 /* files that aren't ln2 blocksz are only one block -- nothing to do */
656 if (!zf->zf_dnode->dn_datablkshift)
657 return;
658
659 /* convert offset and size, into blockid and nblocks */
660 blkshft = zf->zf_dnode->dn_datablkshift;
661 blksz = (1 << blkshft);
662
663 bzero(&zst, sizeof (zstream_t));
664 zst.zst_offset = offset >> blkshft;
665 zst.zst_len = (P2ROUNDUP(offset + size, blksz) -
666 P2ALIGN(offset, blksz)) >> blkshft;
667
668 fetched = dmu_zfetch_find(zf, &zst, prefetched);
669 if (fetched) {
670 ZFETCHSTAT_BUMP(zfetchstat_hits);
671 } else {
672 ZFETCHSTAT_BUMP(zfetchstat_misses);
673 fetched = dmu_zfetch_colinear(zf, &zst);
674 if (fetched) {
675 ZFETCHSTAT_BUMP(zfetchstat_colinear_hits);
676 } else {
677 ZFETCHSTAT_BUMP(zfetchstat_colinear_misses);
678 }
679 }
680
681 if (!fetched) {
682 newstream = dmu_zfetch_stream_reclaim(zf);
683
684 /*
685 * we still couldn't find a stream, drop the lock, and allocate
686 * one if possible. Otherwise, give up and go home.
687 */
688 if (newstream) {
689 ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes);
690 } else {
691 uint64_t maxblocks;
692 uint32_t max_streams;
693 uint32_t cur_streams;
694
695 ZFETCHSTAT_BUMP(zfetchstat_reclaim_failures);
696 cur_streams = zf->zf_stream_cnt;
697 maxblocks = zf->zf_dnode->dn_maxblkid;
698
699 max_streams = MIN(zfetch_max_streams,
700 (maxblocks / zfetch_block_cap));
701 if (max_streams == 0) {
702 max_streams++;
703 }
704
705 if (cur_streams >= max_streams) {
706 return;
707 }
708 newstream = kmem_zalloc(sizeof (zstream_t), KM_SLEEP);
709 }
710
711 newstream->zst_offset = zst.zst_offset;
712 newstream->zst_len = zst.zst_len;
713 newstream->zst_stride = zst.zst_len;
714 newstream->zst_ph_offset = zst.zst_len + zst.zst_offset;
715 newstream->zst_cap = zst.zst_len;
716 newstream->zst_direction = ZFETCH_FORWARD;
717 newstream->zst_last = ddi_get_lbolt();
718
719 mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL);
720
721 rw_enter(&zf->zf_rwlock, RW_WRITER);
722 inserted = dmu_zfetch_stream_insert(zf, newstream);
723 rw_exit(&zf->zf_rwlock);
724
725 if (!inserted) {
726 mutex_destroy(&newstream->zst_lock);
727 kmem_free(newstream, sizeof (zstream_t));
728 }
729 }
730 }
731