xref: /titanic_52/usr/src/uts/common/fs/zev/zev_checksums.c (revision e206ace3d11652b88b328b3d48fa225ae04be559)
1 #include <sys/zfs_events.h>
2 #include <sys/zev_checksums.h>
3 #include <sys/fs/zev.h>
4 #include <sys/zfs_znode.h>
5 #include <sys/sha1.h>
6 #include <sys/avl.h>
7 #include <sys/sysmacros.h>
8 #include <sys/fs/zev.h>
9 #include <sys/zfs_rlock.h>
10 
11 typedef struct zev_sig_cache_chksums_t {
12 	/* begin of key */
13 	uint64_t			offset_l1;
14 	/* end of key */
15 	avl_node_t			avl_node;
16 	uint8_t		sigs[ZEV_L1_SIZE/ZEV_L0_SIZE][SHA1_DIGEST_LENGTH];
17 } zev_sig_cache_chksums_t;
18 
19 typedef struct zev_sig_cache_file_t {
20 	/* begin of key */
21 	uint64_t			guid;
22 	uint64_t			ino;
23 	uint64_t			gen;
24 	/* end of key */
25 	uint32_t			refcnt;
26 	struct zev_sig_cache_file_t 	*lru_prev;
27 	struct zev_sig_cache_file_t 	*lru_next;
28 	avl_node_t			avl_node;
29 	avl_tree_t			chksums;
30 } zev_sig_cache_file_t;
31 
32 typedef struct zev_sig_cache_t {
33 	kmutex_t			mutex;
34 	uint64_t			cache_size;
35 	uint64_t			max_cache_size;
36 	uint64_t			hits;
37 	uint64_t			misses;
38 	struct zev_sig_cache_file_t	*lru_head;
39 	struct zev_sig_cache_file_t	*lru_tail;
40 	avl_tree_t			files;
41 } zev_sig_cache_t;
42 
43 extern offset_t zfs_read_chunk_size;	/* tuneable from zfs_vnops.c */
44 
45 static uint8_t all_zero_sig[SHA1_DIGEST_LENGTH] = {
46 	0x1c, 0xea, 0xf7, 0x3d, 0xf4, 0x0e, 0x53, 0x1d, 0xf3, 0xbf,
47 	0xb2, 0x6b, 0x4f, 0xb7, 0xcd, 0x95, 0xfb, 0x7b, 0xff, 0x1d
48 };
49 
50 static uint8_t unknown_sig[SHA1_DIGEST_LENGTH] = {
51 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
52 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
53 };
54 
55 static zev_sig_cache_t	zev_sig_cache;
56 
57 static int
58 zev_cache_file_cmp(const void *entry_a, const void *entry_b)
59 {
60 	const zev_sig_cache_file_t *a = entry_a;
61 	const zev_sig_cache_file_t *b = entry_b;
62 
63 	if (a->guid < b->guid)
64 		return -1;
65 	if (a->guid > b->guid)
66 		return 1;
67 	if (a->ino < b->ino)
68 		return -1;
69 	if (a->ino > b->ino)
70 		return 1;
71 	if (a->gen < b->gen)
72 		return -1;
73 	if (a->gen > b->gen)
74 		return 1;
75 	return 0;
76 }
77 
78 static int
79 zev_chksum_cache_cmp(const void *entry_a, const void *entry_b)
80 {
81 	const zev_sig_cache_chksums_t *a = entry_a;
82 	const zev_sig_cache_chksums_t *b = entry_b;
83 
84 	if (a->offset_l1 < b->offset_l1)
85 		return -1;
86 	if (a->offset_l1 > b->offset_l1)
87 		return 1;
88 	return 0;
89 }
90 
91 /* must be called with zev_sig_cache.mutex held */
92 static void
93 zev_chksum_cache_file_free(zev_sig_cache_file_t *file)
94 {
95 	zev_sig_cache_chksums_t *cs;
96 	void *c = NULL; /* cookie */
97 
98 	/* remove from lru list */
99 	if (!file->lru_prev) {
100 		zev_sig_cache.lru_head = file->lru_next;
101 	} else {
102 		file->lru_prev->lru_next = file->lru_next;
103 	}
104 	if (!file->lru_next) {
105 		zev_sig_cache.lru_tail = file->lru_prev;
106 	} else {
107 		file->lru_next->lru_prev = file->lru_prev;
108 	}
109 	/* free resources */
110 	avl_remove(&zev_sig_cache.files, file);
111 	while ((cs = avl_destroy_nodes(&file->chksums, &c)) != NULL) {
112 		zev_sig_cache.cache_size -= sizeof(*cs);
113 		zev_free(cs, sizeof(*cs));
114 	}
115 	avl_destroy(&file->chksums);
116 	zev_free(file, sizeof(*file));
117 	zev_sig_cache.cache_size -= sizeof(*file);
118 }
119 
120 void
121 zev_chksum_init(void)
122 {
123 	memset(&zev_sig_cache, 0, sizeof(zev_sig_cache));
124 	mutex_init(&zev_sig_cache.mutex, NULL, MUTEX_DRIVER, NULL);
125 	avl_create(&zev_sig_cache.files, zev_cache_file_cmp,
126 	           sizeof(zev_sig_cache_file_t),
127 	           offsetof(zev_sig_cache_file_t, avl_node));
128 	zev_sig_cache.max_cache_size = ZEV_CHKSUM_DEFAULT_CACHE_SIZE;
129 }
130 
131 void
132 zev_chksum_fini(void)
133 {
134 	zev_sig_cache_file_t *file;
135 
136 	mutex_destroy(&zev_sig_cache.mutex);
137 	while ((file = avl_first(&zev_sig_cache.files)) != NULL)
138 		zev_chksum_cache_file_free(file);
139 	avl_destroy(&zev_sig_cache.files);
140 }
141 
142 static zev_sig_cache_file_t *
143 zev_chksum_cache_file_get_and_hold(znode_t *zp)
144 {
145 	zev_sig_cache_file_t find_file;
146 	zev_sig_cache_file_t *file;
147 	avl_index_t where;
148 
149 	find_file.guid = zp->z_zfsvfs->z_os->os_dsl_dataset->ds_phys->ds_guid;
150 	find_file.ino = zp->z_id;
151 	find_file.gen = zp->z_gen;
152 
153 	mutex_enter(&zev_sig_cache.mutex);
154 	file = avl_find(&zev_sig_cache.files, &find_file, &where);
155 	if (!file) {
156 		file = zev_alloc(sizeof(*file));
157 		file->guid =
158 		    zp->z_zfsvfs->z_os->os_dsl_dataset->ds_phys->ds_guid;
159 		file->ino = zp->z_id;
160 		file->gen = zp->z_gen;
161 		file->refcnt = 0;
162 		avl_create(&file->chksums, zev_chksum_cache_cmp,
163 		           sizeof(zev_sig_cache_chksums_t),
164 		           offsetof(zev_sig_cache_chksums_t, avl_node));
165 		file->lru_prev = NULL;
166 		file->lru_next = zev_sig_cache.lru_head;
167 		if (zev_sig_cache.lru_head)
168 			zev_sig_cache.lru_head->lru_prev = file;
169 		if (!zev_sig_cache.lru_tail)
170 			zev_sig_cache.lru_tail = file;
171 		zev_sig_cache.lru_head = file;
172 		avl_insert(&zev_sig_cache.files, file, where);
173 		zev_sig_cache.cache_size += sizeof(*file);
174 	}
175 	file->refcnt++;
176 	mutex_exit(&zev_sig_cache.mutex);
177 	return file;
178 }
179 
180 static void
181 zev_chksum_cache_file_release(zev_sig_cache_file_t *file)
182 {
183 	mutex_enter(&zev_sig_cache.mutex);
184 
185 	/* We don't invalidate/free/destroy *file. Cache expiry does that */
186 	file->refcnt--;
187 
188 	/* Move file to front of lru list */
189 	if (file->lru_prev) {
190 		/* am not already the head -> move me to front. */
191 		file->lru_prev->lru_next = file->lru_next;
192 		if (file->lru_next)
193 			file->lru_next->lru_prev = file->lru_prev;
194 		zev_sig_cache.lru_head->lru_prev = file;
195 		file->lru_next = zev_sig_cache.lru_head;
196 		file->lru_prev = NULL;
197 		zev_sig_cache.lru_head = file;
198 	}
199 
200 	mutex_exit(&zev_sig_cache.mutex);
201 }
202 
203 static  zev_sig_cache_chksums_t *
204 zev_chksum_cache_get_lv1_entry(zev_sig_cache_file_t *file, uint64_t off_l1)
205 {
206 	zev_sig_cache_chksums_t find_chksum;
207 	zev_sig_cache_chksums_t *cs;
208 	avl_index_t where;
209 
210 	find_chksum.offset_l1 = off_l1;
211 	cs = avl_find(&file->chksums, &find_chksum, &where);
212 	if (!cs) {
213 		cs = zev_zalloc(sizeof(*cs));
214 		cs->offset_l1 = off_l1;
215 		avl_insert(&file->chksums, cs, where);
216 		zev_sig_cache.cache_size += sizeof(*cs);
217 	}
218 	return cs;
219 }
220 
221 void
222 zev_chksum_stats(uint64_t *c_size, uint64_t *c_hits, uint64_t *c_misses)
223 {
224 	mutex_enter(&zev_sig_cache.mutex);
225 	*c_size = zev_sig_cache.cache_size;
226 	*c_hits = zev_sig_cache.hits;
227 	*c_misses = zev_sig_cache.misses;
228 	mutex_exit(&zev_sig_cache.mutex);
229 }
230 
231 static void
232 zev_chksum_cache_invalidate(zev_sig_cache_file_t *file,
233                             znode_t *zp,
234                             zev_chksum_mode_t mode,
235                             uint64_t off,
236                             uint64_t len)
237 {
238 	zev_sig_cache_chksums_t find_chksum;
239 	zev_sig_cache_chksums_t *cs;
240 	int idx;
241 	uint64_t off_l1;
242 	uint64_t len_l1;
243 	uint64_t pos_l0;
244 	uint64_t pos_l1;
245 
246 	mutex_enter(&zev_sig_cache.mutex);
247 
248 	/* start of this megabyte */
249 	off_l1 = P2ALIGN(off, ZEV_L1_SIZE);
250 
251 	if (len == 0) {
252 		/* truncate() to EOF */
253 		len_l1 = ZEV_L1_SIZE;
254 	} else {
255 		/* full megabytes */
256 		len_l1 = len + (off - off_l1);
257 		len_l1 = P2ROUNDUP(len_l1, ZEV_L1_SIZE);
258 	}
259 
260 	for (pos_l1 = off_l1; pos_l1 < (off_l1+len_l1); pos_l1 += ZEV_L1_SIZE) {
261 
262 		find_chksum.offset_l1 = pos_l1;
263 		cs = avl_find(&file->chksums, &find_chksum, NULL);
264 		if (!cs)
265 			continue;
266 
267 		for (pos_l0 = MAX(pos_l1, P2ALIGN(off, ZEV_L0_SIZE));
268 		     pos_l0 < (pos_l1 + ZEV_L1_SIZE);
269 		     pos_l0 += ZEV_L0_SIZE){
270 
271 			if ((len > 0) && (pos_l0 >= (off + len - 1)))
272 				break;
273 
274 			idx = (pos_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
275 			memcpy(cs->sigs[idx], unknown_sig, SHA1_DIGEST_LENGTH);
276 		}
277 	}
278 
279 	if (len == 0) {
280 		/* truncate() to EOF -> invalidate all l1 sigs beyond EOF */
281 		while ((cs = avl_last(&file->chksums)) != NULL) {
282 			if (cs->offset_l1 < zp->z_size)
283 				break;
284 			avl_remove(&file->chksums, cs);
285 			zev_sig_cache.cache_size -= sizeof(*cs);
286 			zev_free(cs, sizeof(*cs));
287 		}
288 	}
289 
290 	mutex_exit(&zev_sig_cache.mutex);
291 }
292 
293 static int
294 zev_chksum_cache_get(uint8_t *dst,
295                      zev_sig_cache_file_t *file,
296                      zev_sig_cache_chksums_t *cs,
297                      uint64_t off_l0)
298 {
299 	int idx;
300 
301 	mutex_enter(&zev_sig_cache.mutex);
302 
303 	idx = (off_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
304 	if (!memcmp(cs->sigs[idx], unknown_sig, SHA1_DIGEST_LENGTH)) {
305 		zev_sig_cache.misses++;
306 		mutex_exit(&zev_sig_cache.mutex);
307 		return ENOENT;
308 	}
309 	memcpy(dst, cs->sigs[idx], SHA1_DIGEST_LENGTH);
310 	zev_sig_cache.hits++;
311 
312 	mutex_exit(&zev_sig_cache.mutex);
313 	return 0;
314 }
315 
316 static void
317 zev_chksum_cache_put(uint8_t *sig,
318                      zev_sig_cache_file_t *file,
319                      zev_sig_cache_chksums_t *cs,
320                      uint64_t off_l0)
321 {
322 	zev_sig_cache_file_t *f;
323 	zev_sig_cache_file_t *tmp;
324 	int idx;
325 
326 	mutex_enter(&zev_sig_cache.mutex);
327 
328 	if (zev_sig_cache.max_cache_size == 0) {
329 		/* cache disabled */
330 		mutex_exit(&zev_sig_cache.mutex);
331 		return;
332 	}
333 
334 	/* expire entries until there's room in the cache */
335 	f = zev_sig_cache.lru_tail;
336 	while (f && (zev_sig_cache.cache_size > zev_sig_cache.max_cache_size)){
337 		tmp = f;
338 		f = f->lru_prev;
339 		if (tmp->refcnt == 0)
340 			zev_chksum_cache_file_free(tmp);
341 	}
342 
343 	idx = (off_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
344 	memcpy(cs->sigs[idx], sig, SHA1_DIGEST_LENGTH);
345 
346 	mutex_exit(&zev_sig_cache.mutex);
347 	return;
348 }
349 
350 /* verbatim from zfs_vnops.c (unfortunatly it's declared static, there) */
351 static int
352 mappedread(vnode_t *vp, int nbytes, uio_t *uio)
353 {
354 	znode_t *zp = VTOZ(vp);
355 	objset_t *os = zp->z_zfsvfs->z_os;
356 	int64_t	start, off;
357 	int len = nbytes;
358 	int error = 0;
359 
360 	start = uio->uio_loffset;
361 	off = start & PAGEOFFSET;
362 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
363 		page_t *pp;
364 		uint64_t bytes = MIN(PAGESIZE - off, len);
365 
366 		if (pp = page_lookup(vp, start, SE_SHARED)) {
367 			caddr_t va;
368 
369 			va = zfs_map_page(pp, S_READ);
370 			error = uiomove(va + off, bytes, UIO_READ, uio);
371 			zfs_unmap_page(pp, va);
372 			page_unlock(pp);
373 		} else {
374 			error = dmu_read_uio(os, zp->z_id, uio, bytes);
375 		}
376 		len -= bytes;
377 		off = 0;
378 		if (error)
379 			break;
380 	}
381 	return (error);
382 }
383 
384 static int
385 zev_safe_read(znode_t *zp, char *buf, uint64_t off, uint64_t len)
386 {
387 	uio_t		uio;
388 	struct iovec	iov;
389 	ssize_t		n;
390 	ssize_t		nbytes;
391 	int		error = 0;
392 	vnode_t		*vp = ZTOV(zp);
393 	objset_t	*os = zp->z_zfsvfs->z_os;
394 
395 	/* set up uio */
396 
397 	iov.iov_base = buf;
398 	iov.iov_len = ZEV_L0_SIZE;
399 
400 	uio.uio_iov = &iov;
401 	uio.uio_iovcnt = 1;
402 	uio.uio_segflg = (short)UIO_SYSSPACE;
403 	uio.uio_llimit = RLIM64_INFINITY;
404 	uio.uio_fmode = FREAD;
405 	uio.uio_extflg = UIO_COPY_DEFAULT;
406 
407 	uio.uio_loffset = off;
408 	uio.uio_resid = len;
409 
410 again:
411 	if (uio.uio_loffset >= zp->z_size)
412 		return EINVAL;
413 
414 	/* don't read past EOF */
415 	n = MIN(uio.uio_resid, zp->z_size - uio.uio_loffset);
416 
417 	/* this block was essentially copied from zfs_read() in zfs_vnops.c */
418 	while (n > 0) {
419 		nbytes = MIN(n, zfs_read_chunk_size -
420 		    P2PHASE(uio.uio_loffset, zfs_read_chunk_size));
421 
422 		if (vn_has_cached_data(vp)) {
423 			error = mappedread(vp, nbytes, &uio);
424 		} else {
425 			error = dmu_read_uio(os, zp->z_id, &uio, nbytes);
426 		}
427 		if (error) {
428 			if (error = EINTR)
429 				goto again;
430 			/* convert checksum errors into IO errors */
431 			if (error == ECKSUM)
432 				error = SET_ERROR(EIO);
433 			break;
434 		}
435 
436 		n -= nbytes;
437 	}
438 
439 	if (error)
440 		return error;
441 	return len - uio.uio_resid;
442 }
443 
444 static void
445 zev_l0_sig(uint8_t *sig, char *buf)
446 {
447 	SHA1_CTX	ctx;
448 
449 	SHA1Init(&ctx);
450 	SHA1Update(&ctx, buf, ZEV_L0_SIZE);
451 	SHA1Final(sig, &ctx);
452 	return;
453 }
454 
455 static void
456 zev_l0_blocksig(uint8_t *blk_sig, uint8_t *l0_sig, uint8_t block_no)
457 {
458 	SHA1_CTX	ctx;
459 
460 	SHA1Init(&ctx);
461 	SHA1Update(&ctx, l0_sig, SHA1_DIGEST_LENGTH);
462 	SHA1Update(&ctx, &block_no, sizeof(block_no));
463 	SHA1Final(blk_sig, &ctx);
464 	return;
465 }
466 
467 static void
468 zev_l1_add(uint8_t *sig_l1, uint8_t *sig_l0)
469 {
470 	int	i;
471 	int	s;
472 	int	carry = 0;
473 
474 	for (i = SHA1_DIGEST_LENGTH - 1; i >= 0; --i) {
475 		s = sig_l1[i] + sig_l0[i] + carry;
476 		carry = s > 255 ? 1 : 0;
477 		sig_l1[i] = s & 0xff;
478 	}
479 }
480 
481 static int
482 zev_get_result_buffer(zev_sig_t **buffer,
483                       uint64_t *buffer_len,
484                       uint64_t max_buffer_len,
485                       znode_t *zp,
486                       uint64_t off,
487                       uint64_t len,
488                       zev_chksum_mode_t mode)
489 {
490 	uint64_t	blk_start;
491 	uint64_t	blk_end;
492 	uint64_t	l0_blocks;
493 	uint64_t	l1_blocks;
494 	uint64_t	sigs;
495 	int buflen;
496 
497 	/* calculate result set size: how many checksums will we provide? */
498 
499 	ASSERT(len > 0 || (mode == zev_truncate && len == 0));
500 
501 	if (len == 0) {
502 		/* truncate */
503 		l0_blocks = ((off % ZEV_L0_SIZE) == 0) ? 0 : 1;
504 		l1_blocks = ((off % ZEV_L1_SIZE) == 0) ? 0 : 1;
505 	} else {
506 		/* how many lv1 checksums do we update? */
507 		blk_start = off / ZEV_L1_SIZE;
508 		blk_end = (off + len - 1) / ZEV_L1_SIZE;
509 		l1_blocks = blk_end - blk_start + 1;
510 		/* how many lv0 checksums do we update? */
511 		blk_start = off / ZEV_L0_SIZE;
512 		blk_end = (off + len - 1) / ZEV_L0_SIZE;
513 		l0_blocks = blk_end - blk_start + 1;
514 	}
515 
516 	sigs = l1_blocks + l0_blocks;
517 	if (sigs == 0) {
518 		*buffer = NULL;
519 		*buffer_len = 0;
520 		return 0;
521 	}
522 
523 	buflen = sigs * sizeof(zev_sig_t);
524 	if (max_buffer_len && (buflen > max_buffer_len)) {
525 		*buffer = NULL;
526 		*buffer_len = 0;
527 		return ENOSPC;
528 	}
529 	*buffer_len = buflen;
530 	*buffer = zev_alloc(buflen);
531 	return 0;
532 }
533 
534 static void
535 zev_append_sig(zev_sig_t *s, int level, uint64_t off, uint8_t *sig)
536 {
537 	s->level = level;
538 	s->block_offset = off;
539 	memcpy(s->value, sig, SHA1_DIGEST_LENGTH);
540 }
541 
542 /*
543  * Calculate all l0 and l1 checksums that are affected by the given range.
544  *
545  * This function assumes that the ranges it needs to read are already
546  * range-locked.
547  */
548 int
549 zev_get_checksums(zev_sig_t **result,
550                   uint64_t *result_buf_len,
551                   uint64_t *signature_cnt,
552                   uint64_t max_result_len,
553                   znode_t *zp,
554                   uint64_t off,
555                   uint64_t len,
556                   zev_chksum_mode_t mode)
557 {
558 	uint64_t	off_l1;
559 	uint64_t	len_l1;
560 	uint64_t	pos_l1;
561 	uint64_t	pos_l0;
562 	char		*buf;
563 	int64_t		ret;
564 	uint8_t		sig_l0[SHA1_DIGEST_LENGTH];
565 	uint8_t		blk_sig_l0[SHA1_DIGEST_LENGTH];
566 	uint8_t		sig_l1[SHA1_DIGEST_LENGTH];
567 	uint8_t		l0_block_no;
568 	zev_sig_t	*sig;
569 	int		non_empty_l0_blocks;
570 	zev_sig_cache_file_t *file;
571 	zev_sig_cache_chksums_t *cs;
572 
573 	/*
574 	 * Note: for write events, the callback is called via
575 	 *    zfs_write() -> zfs_log_write() -> zev_znode_write_cb()
576 	 *
577 	 * The transaction is not commited, yet.
578 	 *
579 	 * A write() syscall might be split into smaller chunks by zfs_write()
580 	 *
581 	 * zfs_write() has a range lock when this is called. (zfs_vnops.c:925)
582 	 * In zev mode, the range lock will encompass all data we need
583 	 * to calculate our checksums.
584 	 *
585 	 * The same is true for truncates with non-zero length. ("punch hole")
586 	 */
587 
588 	ASSERT(len > 0 || (mode == zev_truncate && len == 0));
589 	*signature_cnt = 0;
590 
591 	/* start of this megabyte */
592 	off_l1 = P2ALIGN(off, ZEV_L1_SIZE);
593 	/* full megabytes */
594 	if (len == 0) {
595 		/* truncate(): we'll look at the last lv1 block, only. */
596 		len_l1 = ZEV_L1_SIZE;
597 	} else {
598 		len_l1 = len + (off - off_l1);
599 		len_l1 = P2ROUNDUP(len_l1, ZEV_L1_SIZE);
600 	}
601 
602 	file = zev_chksum_cache_file_get_and_hold(zp);
603 	zev_chksum_cache_invalidate(file, zp, mode, off, len);
604 	buf = zev_alloc(ZEV_L0_SIZE);
605 
606 	ret = zev_get_result_buffer(result, result_buf_len, max_result_len,
607 	                            zp, off, len, mode);
608 	if (ret) {
609 		zev_free(buf, ZEV_L0_SIZE);
610 		zev_chksum_cache_file_release(file);
611 		return ret;
612 	}
613 	if (*result == NULL) {
614 		/* we're done */
615 		zev_free(buf, ZEV_L0_SIZE);
616 		zev_chksum_cache_file_release(file);
617 		return 0;
618 	}
619 	sig = *result;
620 
621 	for (pos_l1 = off_l1; pos_l1 < (off_l1+len_l1); pos_l1 += ZEV_L1_SIZE) {
622 
623 		if (pos_l1 > zp->z_size) {
624 			cmn_err(CE_WARN, "zev_get_checksums: off+len beyond "
625 			        "EOF.  Unexpected behaviour; please fix!");
626 			break;
627 		}
628 
629 		/*
630 		 * Since we have a reference to 'file' 'cs' can't be expired.
631 		 * Since our ranges are range locked, other threads woun't
632 		 * touch our checksum entries. (not even read them)
633 		 * Hence, we don't need to hold() or release() 'cs'.
634 		 */
635 		cs = zev_chksum_cache_get_lv1_entry(file, pos_l1);
636 
637 		l0_block_no = 0;
638 		non_empty_l0_blocks = 0;
639 		bzero(sig_l1, sizeof(sig_l1));
640 		for (pos_l0 = pos_l1;
641 		     pos_l0 < (pos_l1 + ZEV_L1_SIZE);
642 		     pos_l0 += ZEV_L0_SIZE){
643 
644 			if (pos_l0 >= zp->z_size)
645 				break;	/* EOF */
646 
647 			if (zev_chksum_cache_get(sig_l0, file,cs,pos_l0) != 0) {
648 
649 				/* signature is not cached, yet. */
650 				ret = zev_safe_read(zp, buf,
651 				                    pos_l0, ZEV_L0_SIZE);
652 				if (ret < 0) {
653 					zev_free(*result, *result_buf_len);
654 					zev_free(buf, ZEV_L0_SIZE);
655 					zev_chksum_cache_file_release(file);
656 					return ret;
657 				}
658 				/* pad buffer with zeros if necessary */
659 				if (ret < ZEV_L0_SIZE)
660 					bzero(buf + ret, ZEV_L0_SIZE - ret);
661 
662 				/* calculate signature */
663 				zev_l0_sig(sig_l0, buf);
664 
665 				zev_chksum_cache_put(sig_l0, file, cs, pos_l0);
666 			}
667 
668 			if (!memcmp(sig_l0, all_zero_sig, SHA1_DIGEST_LENGTH)) {
669 				/* all-zero l0 block.  omit signature. */
670 				l0_block_no++;
671 				continue;
672 			}
673 			non_empty_l0_blocks++;
674 			zev_l0_blocksig(blk_sig_l0, sig_l0, l0_block_no);
675 			zev_l1_add(sig_l1, blk_sig_l0);
676 
677 			if (((pos_l0 + ZEV_L0_SIZE - 1) >= off) &&
678 			    (pos_l0 < (off + len - 1))) {
679 				zev_append_sig(sig++, 0, pos_l0, sig_l0);
680 			}
681 
682 			l0_block_no++;
683 		}
684 
685 		if (non_empty_l0_blocks && (zp->z_size > ZEV_L0_SIZE))
686 			zev_append_sig(sig++, 1, pos_l1, sig_l1);
687 	}
688 
689 	*signature_cnt = ((char *)sig - (char *)*result) / sizeof(zev_sig_t);
690 
691 	zev_free(buf, ZEV_L0_SIZE);
692 	zev_chksum_cache_file_release(file);
693 	return 0;
694 }
695 
696 int
697 zev_ioc_get_signatures(intptr_t arg, int mode)
698 {
699 	zev_ioctl_get_signatures_t gs;
700 	file_t *fp;
701 	int ret = 0;
702 	znode_t *zp;
703 	zev_sig_t *sig_buf = NULL;
704 	uint64_t sig_buf_len;
705 	uint64_t sig_cnt = 0;
706 	uint64_t sig_len;
707 	char *dst;
708 	int range_locked = 0;
709 	rl_t *rl;
710 	ssize_t	lock_off;
711 	ssize_t lock_len;
712 
713 	if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0)
714 		return EFAULT;
715 	fp = getf(gs.zev_fd);
716 	if (fp == NULL)
717 		return EBADF;
718 	if (fp->f_vnode->v_vfsp->vfs_fstype != zfsfstype) {
719 		ret = EINVAL;
720 		goto out;
721 	}
722 	if (fp->f_vnode->v_type != VREG) {
723 		ret = EINVAL;
724 		goto out;
725 	}
726 	zp = VTOZ(fp->f_vnode);
727 	if (gs.zev_offset >= zp->z_size) {
728 		ret = EINVAL;
729 		goto out;
730 	}
731 
732 	/* range lock data */
733 	lock_off = P2ALIGN(gs.zev_offset, ZEV_L1_SIZE);
734 	lock_len = gs.zev_len + (gs.zev_offset - lock_off);
735 	lock_len = P2ROUNDUP(lock_len, ZEV_L1_SIZE);
736 	rl = zfs_range_lock(zp, lock_off, lock_len, RL_READER);
737 	range_locked = 1;
738 
739 	/* get checksums */
740 	ret = zev_get_checksums(&sig_buf, &sig_buf_len, &sig_cnt,
741 	                        gs.zev_bufsize,
742 	                        zp, gs.zev_offset, gs.zev_len, zev_write);
743 	if (ret)
744 		goto out;
745 
746 	/* copy to userland */
747 	sig_len = sig_cnt * sizeof(zev_sig_t);
748 	gs.zev_signature_cnt = sig_cnt;
749 	if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) {
750 		ret = EFAULT;
751 		goto out;
752 	}
753 	if (sig_cnt && sig_buf) {
754 		dst = (char *)arg + sizeof(gs);
755 		if (ddi_copyout(sig_buf, (void *)dst, sig_len, mode) != 0) {
756 			ret = EFAULT;
757 			goto out;
758 		}
759 	}
760 out:
761 	if (sig_buf)
762 		zev_free(sig_buf, sig_buf_len);
763 	if (range_locked)
764 		zfs_range_unlock(rl);
765 	releasef(gs.zev_fd);
766 	return ret;
767 }
768 
769