xref: /titanic_41/usr/src/uts/common/fs/zev/zev_checksums.c (revision 721df37ded2cce623b0249cc35a872c3ce291b9b)
1 #include <sys/zfs_events.h>
2 #include <sys/zev_checksums.h>
3 #include <sys/fs/zev.h>
4 #include <sys/zfs_znode.h>
5 #include <sys/sha1.h>
6 #include <sys/avl.h>
7 #include <sys/sysmacros.h>
8 #include <sys/fs/zev.h>
9 #include <sys/zfs_rlock.h>
10 #include <sys/list.h>
11 
12 typedef struct zev_sig_cache_chksums_t {
13 	/* begin of key */
14 	uint64_t			offset_l1;
15 	/* end of key */
16 	avl_node_t			avl_node;
17 	uint8_t		sigs[ZEV_L1_SIZE/ZEV_L0_SIZE][SHA1_DIGEST_LENGTH];
18 } zev_sig_cache_chksums_t;
19 
20 typedef struct zev_sig_cache_file_t {
21 	/* begin of key */
22 	uint64_t			guid;
23 	uint64_t			ino;
24 	uint64_t			gen;
25 	/* end of key */
26 	uint32_t			refcnt;
27 	list_node_t			lru_node;
28 	avl_node_t			avl_node;
29 	avl_tree_t			chksums;
30 } zev_sig_cache_file_t;
31 
32 typedef struct zev_sig_cache_t {
33 	kmutex_t			mutex;
34 	uint64_t			cache_size;
35 	uint64_t			max_cache_size;
36 	uint64_t			hits;
37 	uint64_t			misses;
38 	list_t				lru;
39 	avl_tree_t			files;
40 } zev_sig_cache_t;
41 
42 extern offset_t zfs_read_chunk_size;	/* tuneable from zfs_vnops.c */
43 
44 static uint8_t all_zero_sig[SHA1_DIGEST_LENGTH] = {
45 	0x1c, 0xea, 0xf7, 0x3d, 0xf4, 0x0e, 0x53, 0x1d, 0xf3, 0xbf,
46 	0xb2, 0x6b, 0x4f, 0xb7, 0xcd, 0x95, 0xfb, 0x7b, 0xff, 0x1d
47 };
48 
49 static uint8_t unknown_sig[SHA1_DIGEST_LENGTH] = {
50 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
51 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
52 };
53 
54 static zev_sig_cache_t	zev_sig_cache;
55 
56 static int
57 zev_cache_file_cmp(const void *entry_a, const void *entry_b)
58 {
59 	const zev_sig_cache_file_t *a = entry_a;
60 	const zev_sig_cache_file_t *b = entry_b;
61 
62 	if (a->guid < b->guid)
63 		return -1;
64 	if (a->guid > b->guid)
65 		return 1;
66 	if (a->ino < b->ino)
67 		return -1;
68 	if (a->ino > b->ino)
69 		return 1;
70 	if (a->gen < b->gen)
71 		return -1;
72 	if (a->gen > b->gen)
73 		return 1;
74 	return 0;
75 }
76 
77 static int
78 zev_chksum_cache_cmp(const void *entry_a, const void *entry_b)
79 {
80 	const zev_sig_cache_chksums_t *a = entry_a;
81 	const zev_sig_cache_chksums_t *b = entry_b;
82 
83 	if (a->offset_l1 < b->offset_l1)
84 		return -1;
85 	if (a->offset_l1 > b->offset_l1)
86 		return 1;
87 	return 0;
88 }
89 
90 /* must be called with zev_sig_cache.mutex held */
91 static void
92 zev_chksum_cache_file_free(zev_sig_cache_file_t *file)
93 {
94 	zev_sig_cache_chksums_t *cs;
95 	void *c = NULL; /* cookie */
96 
97 	/* remove from lru list */
98 	list_remove(&zev_sig_cache.lru, file);
99 	/* free resources */
100 	avl_remove(&zev_sig_cache.files, file);
101 	while ((cs = avl_destroy_nodes(&file->chksums, &c)) != NULL) {
102 		zev_sig_cache.cache_size -= sizeof(*cs);
103 		zev_free(cs, sizeof(*cs));
104 	}
105 	avl_destroy(&file->chksums);
106 	zev_free(file, sizeof(*file));
107 	zev_sig_cache.cache_size -= sizeof(*file);
108 }
109 
110 void
111 zev_chksum_init(void)
112 {
113 	memset(&zev_sig_cache, 0, sizeof(zev_sig_cache));
114 	mutex_init(&zev_sig_cache.mutex, NULL, MUTEX_DRIVER, NULL);
115 	avl_create(&zev_sig_cache.files, zev_cache_file_cmp,
116 	           sizeof(zev_sig_cache_file_t),
117 	           offsetof(zev_sig_cache_file_t, avl_node));
118 	list_create(&zev_sig_cache.lru,
119 	            sizeof(zev_sig_cache_file_t),
120 	            offsetof(zev_sig_cache_file_t, lru_node));
121 	zev_sig_cache.max_cache_size = ZEV_CHKSUM_DEFAULT_CACHE_SIZE;
122 }
123 
124 void
125 zev_chksum_fini(void)
126 {
127 	zev_sig_cache_file_t *file;
128 
129 	mutex_destroy(&zev_sig_cache.mutex);
130 	while ((file = avl_first(&zev_sig_cache.files)) != NULL)
131 		zev_chksum_cache_file_free(file);
132 	list_destroy(&zev_sig_cache.lru);
133 	avl_destroy(&zev_sig_cache.files);
134 }
135 
136 static zev_sig_cache_file_t *
137 zev_chksum_cache_file_get_and_hold(znode_t *zp)
138 {
139 	zev_sig_cache_file_t find_file;
140 	zev_sig_cache_file_t *file;
141 	avl_index_t where;
142 
143 	find_file.guid = zp->z_zfsvfs->z_os->os_dsl_dataset->ds_phys->ds_guid;
144 	find_file.ino = zp->z_id;
145 	find_file.gen = zp->z_gen;
146 
147 	mutex_enter(&zev_sig_cache.mutex);
148 	file = avl_find(&zev_sig_cache.files, &find_file, &where);
149 	if (!file) {
150 		file = zev_alloc(sizeof(*file));
151 		file->guid =
152 		    zp->z_zfsvfs->z_os->os_dsl_dataset->ds_phys->ds_guid;
153 		file->ino = zp->z_id;
154 		file->gen = zp->z_gen;
155 		file->refcnt = 0;
156 		avl_create(&file->chksums, zev_chksum_cache_cmp,
157 		           sizeof(zev_sig_cache_chksums_t),
158 		           offsetof(zev_sig_cache_chksums_t, avl_node));
159 		list_insert_head(&zev_sig_cache.lru, file);
160 		avl_insert(&zev_sig_cache.files, file, where);
161 		zev_sig_cache.cache_size += sizeof(*file);
162 	}
163 	file->refcnt++;
164 	mutex_exit(&zev_sig_cache.mutex);
165 	return file;
166 }
167 
168 static void
169 zev_chksum_cache_file_release(zev_sig_cache_file_t *file)
170 {
171 	mutex_enter(&zev_sig_cache.mutex);
172 
173 	/* We don't invalidate/free/destroy *file. Cache expiry does that */
174 	file->refcnt--;
175 
176 	/* Move file to front of lru list */
177 	list_remove(&zev_sig_cache.lru, file);
178 	list_insert_head(&zev_sig_cache.lru, file);
179 
180 	mutex_exit(&zev_sig_cache.mutex);
181 }
182 
183 static  zev_sig_cache_chksums_t *
184 zev_chksum_cache_get_lv1_entry(zev_sig_cache_file_t *file, uint64_t off_l1)
185 {
186 	zev_sig_cache_chksums_t find_chksum;
187 	zev_sig_cache_chksums_t *cs;
188 	avl_index_t where;
189 
190 	find_chksum.offset_l1 = off_l1;
191 	cs = avl_find(&file->chksums, &find_chksum, &where);
192 	if (!cs) {
193 		cs = zev_zalloc(sizeof(*cs));
194 		cs->offset_l1 = off_l1;
195 		avl_insert(&file->chksums, cs, where);
196 		zev_sig_cache.cache_size += sizeof(*cs);
197 	}
198 	return cs;
199 }
200 
201 void
202 zev_chksum_stats(uint64_t *c_size, uint64_t *c_hits, uint64_t *c_misses)
203 {
204 	mutex_enter(&zev_sig_cache.mutex);
205 	*c_size = zev_sig_cache.cache_size;
206 	*c_hits = zev_sig_cache.hits;
207 	*c_misses = zev_sig_cache.misses;
208 	mutex_exit(&zev_sig_cache.mutex);
209 }
210 
211 static void
212 zev_chksum_cache_invalidate(zev_sig_cache_file_t *file,
213                             znode_t *zp,
214                             zev_chksum_mode_t mode,
215                             uint64_t off,
216                             uint64_t len)
217 {
218 	zev_sig_cache_chksums_t find_chksum;
219 	zev_sig_cache_chksums_t *cs;
220 	int idx;
221 	uint64_t off_l1;
222 	uint64_t len_l1;
223 	uint64_t pos_l0;
224 	uint64_t pos_l1;
225 
226 	mutex_enter(&zev_sig_cache.mutex);
227 
228 	/* start of this megabyte */
229 	off_l1 = P2ALIGN(off, ZEV_L1_SIZE);
230 
231 	if (len == 0) {
232 		/* truncate() to EOF */
233 		len_l1 = ZEV_L1_SIZE;
234 	} else {
235 		/* full megabytes */
236 		len_l1 = len + (off - off_l1);
237 		len_l1 = P2ROUNDUP(len_l1, ZEV_L1_SIZE);
238 	}
239 
240 	for (pos_l1 = off_l1; pos_l1 < (off_l1+len_l1); pos_l1 += ZEV_L1_SIZE) {
241 
242 		find_chksum.offset_l1 = pos_l1;
243 		cs = avl_find(&file->chksums, &find_chksum, NULL);
244 		if (!cs)
245 			continue;
246 
247 		for (pos_l0 = MAX(pos_l1, P2ALIGN(off, ZEV_L0_SIZE));
248 		     pos_l0 < (pos_l1 + ZEV_L1_SIZE);
249 		     pos_l0 += ZEV_L0_SIZE){
250 
251 			if ((len > 0) && (pos_l0 >= (off + len - 1)))
252 				break;
253 
254 			idx = (pos_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
255 			memcpy(cs->sigs[idx], unknown_sig, SHA1_DIGEST_LENGTH);
256 		}
257 	}
258 
259 	if (len == 0) {
260 		/* truncate() to EOF -> invalidate all l1 sigs beyond EOF */
261 		while ((cs = avl_last(&file->chksums)) != NULL) {
262 			if (cs->offset_l1 < zp->z_size)
263 				break;
264 			avl_remove(&file->chksums, cs);
265 			zev_sig_cache.cache_size -= sizeof(*cs);
266 			zev_free(cs, sizeof(*cs));
267 		}
268 	}
269 
270 	mutex_exit(&zev_sig_cache.mutex);
271 }
272 
273 static int
274 zev_chksum_cache_get(uint8_t *dst,
275                      zev_sig_cache_file_t *file,
276                      zev_sig_cache_chksums_t *cs,
277                      uint64_t off_l0)
278 {
279 	int idx;
280 
281 	mutex_enter(&zev_sig_cache.mutex);
282 
283 	idx = (off_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
284 	if (!memcmp(cs->sigs[idx], unknown_sig, SHA1_DIGEST_LENGTH)) {
285 		zev_sig_cache.misses++;
286 		mutex_exit(&zev_sig_cache.mutex);
287 		return ENOENT;
288 	}
289 	memcpy(dst, cs->sigs[idx], SHA1_DIGEST_LENGTH);
290 	zev_sig_cache.hits++;
291 
292 	mutex_exit(&zev_sig_cache.mutex);
293 	return 0;
294 }
295 
296 static void
297 zev_chksum_cache_put(uint8_t *sig,
298                      zev_sig_cache_file_t *file,
299                      zev_sig_cache_chksums_t *cs,
300                      uint64_t off_l0)
301 {
302 	zev_sig_cache_file_t *f;
303 	zev_sig_cache_file_t *tmp;
304 	int idx;
305 
306 	mutex_enter(&zev_sig_cache.mutex);
307 
308 	if (zev_sig_cache.max_cache_size == 0) {
309 		/* cache disabled */
310 		mutex_exit(&zev_sig_cache.mutex);
311 		return;
312 	}
313 
314 	/* expire entries until there's room in the cache */
315 	f = list_tail(&zev_sig_cache.lru);
316 	while (f && (zev_sig_cache.cache_size > zev_sig_cache.max_cache_size)){
317 		tmp = f;
318 		f = list_prev(&zev_sig_cache.lru, f);
319 		if (tmp->refcnt == 0)
320 			zev_chksum_cache_file_free(tmp);
321 	}
322 
323 	idx = (off_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
324 	memcpy(cs->sigs[idx], sig, SHA1_DIGEST_LENGTH);
325 
326 	mutex_exit(&zev_sig_cache.mutex);
327 	return;
328 }
329 
330 /* verbatim from zfs_vnops.c (unfortunatly it's declared static, there) */
331 static int
332 mappedread(vnode_t *vp, int nbytes, uio_t *uio)
333 {
334 	znode_t *zp = VTOZ(vp);
335 	objset_t *os = zp->z_zfsvfs->z_os;
336 	int64_t	start, off;
337 	int len = nbytes;
338 	int error = 0;
339 
340 	start = uio->uio_loffset;
341 	off = start & PAGEOFFSET;
342 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
343 		page_t *pp;
344 		uint64_t bytes = MIN(PAGESIZE - off, len);
345 
346 		if (pp = page_lookup(vp, start, SE_SHARED)) {
347 			caddr_t va;
348 
349 			va = zfs_map_page(pp, S_READ);
350 			error = uiomove(va + off, bytes, UIO_READ, uio);
351 			zfs_unmap_page(pp, va);
352 			page_unlock(pp);
353 		} else {
354 			error = dmu_read_uio(os, zp->z_id, uio, bytes);
355 		}
356 		len -= bytes;
357 		off = 0;
358 		if (error)
359 			break;
360 	}
361 	return (error);
362 }
363 
364 static int
365 zev_safe_read(znode_t *zp, char *buf, uint64_t off, uint64_t len)
366 {
367 	uio_t		uio;
368 	struct iovec	iov;
369 	ssize_t		n;
370 	ssize_t		nbytes;
371 	int		error = 0;
372 	vnode_t		*vp = ZTOV(zp);
373 	objset_t	*os = zp->z_zfsvfs->z_os;
374 
375 	/* set up uio */
376 
377 	iov.iov_base = buf;
378 	iov.iov_len = ZEV_L0_SIZE;
379 
380 	uio.uio_iov = &iov;
381 	uio.uio_iovcnt = 1;
382 	uio.uio_segflg = (short)UIO_SYSSPACE;
383 	uio.uio_llimit = RLIM64_INFINITY;
384 	uio.uio_fmode = FREAD;
385 	uio.uio_extflg = UIO_COPY_DEFAULT;
386 
387 	uio.uio_loffset = off;
388 	uio.uio_resid = len;
389 
390 again:
391 	if (uio.uio_loffset >= zp->z_size)
392 		return EINVAL;
393 
394 	/* don't read past EOF */
395 	n = MIN(uio.uio_resid, zp->z_size - uio.uio_loffset);
396 
397 	/* this block was essentially copied from zfs_read() in zfs_vnops.c */
398 	while (n > 0) {
399 		nbytes = MIN(n, zfs_read_chunk_size -
400 		    P2PHASE(uio.uio_loffset, zfs_read_chunk_size));
401 
402 		if (vn_has_cached_data(vp)) {
403 			error = mappedread(vp, nbytes, &uio);
404 		} else {
405 			error = dmu_read_uio(os, zp->z_id, &uio, nbytes);
406 		}
407 		if (error) {
408 			if (error = EINTR)
409 				goto again;
410 			/* convert checksum errors into IO errors */
411 			if (error == ECKSUM)
412 				error = SET_ERROR(EIO);
413 			break;
414 		}
415 
416 		n -= nbytes;
417 	}
418 
419 	if (error)
420 		return error;
421 	return len - uio.uio_resid;
422 }
423 
424 static void
425 zev_l0_sig(uint8_t *sig, char *buf)
426 {
427 	SHA1_CTX	ctx;
428 
429 	SHA1Init(&ctx);
430 	SHA1Update(&ctx, buf, ZEV_L0_SIZE);
431 	SHA1Final(sig, &ctx);
432 	return;
433 }
434 
435 static void
436 zev_l0_blocksig(uint8_t *blk_sig, uint8_t *l0_sig, uint8_t block_no)
437 {
438 	SHA1_CTX	ctx;
439 
440 	SHA1Init(&ctx);
441 	SHA1Update(&ctx, l0_sig, SHA1_DIGEST_LENGTH);
442 	SHA1Update(&ctx, &block_no, sizeof(block_no));
443 	SHA1Final(blk_sig, &ctx);
444 	return;
445 }
446 
447 static void
448 zev_l1_add(uint8_t *sig_l1, uint8_t *sig_l0)
449 {
450 	int	i;
451 	int	s;
452 	int	carry = 0;
453 
454 	for (i = SHA1_DIGEST_LENGTH - 1; i >= 0; --i) {
455 		s = sig_l1[i] + sig_l0[i] + carry;
456 		carry = s > 255 ? 1 : 0;
457 		sig_l1[i] = s & 0xff;
458 	}
459 }
460 
461 static int
462 zev_get_result_buffer(zev_sig_t **buffer,
463                       uint64_t *buffer_len,
464                       uint64_t max_buffer_len,
465                       znode_t *zp,
466                       uint64_t off,
467                       uint64_t len,
468                       zev_chksum_mode_t mode)
469 {
470 	uint64_t	blk_start;
471 	uint64_t	blk_end;
472 	uint64_t	l0_blocks;
473 	uint64_t	l1_blocks;
474 	uint64_t	sigs;
475 	int buflen;
476 
477 	/* calculate result set size: how many checksums will we provide? */
478 
479 	ASSERT(len > 0 || (mode == zev_truncate && len == 0));
480 
481 	if (len == 0) {
482 		/* truncate */
483 		l0_blocks = ((off % ZEV_L0_SIZE) == 0) ? 0 : 1;
484 		l1_blocks = ((off % ZEV_L1_SIZE) == 0) ? 0 : 1;
485 	} else {
486 		/* how many lv1 checksums do we update? */
487 		blk_start = off / ZEV_L1_SIZE;
488 		blk_end = (off + len - 1) / ZEV_L1_SIZE;
489 		l1_blocks = blk_end - blk_start + 1;
490 		/* how many lv0 checksums do we update? */
491 		blk_start = off / ZEV_L0_SIZE;
492 		blk_end = (off + len - 1) / ZEV_L0_SIZE;
493 		l0_blocks = blk_end - blk_start + 1;
494 	}
495 
496 	sigs = l1_blocks + l0_blocks;
497 	if (sigs == 0) {
498 		*buffer = NULL;
499 		*buffer_len = 0;
500 		return 0;
501 	}
502 
503 	buflen = sigs * sizeof(zev_sig_t);
504 	if (max_buffer_len && (buflen > max_buffer_len)) {
505 		*buffer = NULL;
506 		*buffer_len = 0;
507 		return ENOSPC;
508 	}
509 	*buffer_len = buflen;
510 	*buffer = zev_alloc(buflen);
511 	return 0;
512 }
513 
514 static void
515 zev_append_sig(zev_sig_t *s, int level, uint64_t off, uint8_t *sig)
516 {
517 	s->level = level;
518 	s->block_offset = off;
519 	memcpy(s->value, sig, SHA1_DIGEST_LENGTH);
520 }
521 
522 /*
523  * Calculate all l0 and l1 checksums that are affected by the given range.
524  *
525  * This function assumes that the ranges it needs to read are already
526  * range-locked.
527  */
528 int
529 zev_get_checksums(zev_sig_t **result,
530                   uint64_t *result_buf_len,
531                   uint64_t *signature_cnt,
532                   uint64_t max_result_len,
533                   znode_t *zp,
534                   uint64_t off,
535                   uint64_t len,
536                   zev_chksum_mode_t mode)
537 {
538 	uint64_t	off_l1;
539 	uint64_t	len_l1;
540 	uint64_t	pos_l1;
541 	uint64_t	pos_l0;
542 	char		*buf;
543 	int64_t		ret;
544 	uint8_t		sig_l0[SHA1_DIGEST_LENGTH];
545 	uint8_t		blk_sig_l0[SHA1_DIGEST_LENGTH];
546 	uint8_t		sig_l1[SHA1_DIGEST_LENGTH];
547 	uint8_t		l0_block_no;
548 	zev_sig_t	*sig;
549 	int		non_empty_l0_blocks;
550 	zev_sig_cache_file_t *file;
551 	zev_sig_cache_chksums_t *cs;
552 
553 	/*
554 	 * Note: for write events, the callback is called via
555 	 *    zfs_write() -> zfs_log_write() -> zev_znode_write_cb()
556 	 *
557 	 * The transaction is not commited, yet.
558 	 *
559 	 * A write() syscall might be split into smaller chunks by zfs_write()
560 	 *
561 	 * zfs_write() has a range lock when this is called. (zfs_vnops.c:925)
562 	 * In zev mode, the range lock will encompass all data we need
563 	 * to calculate our checksums.
564 	 *
565 	 * The same is true for truncates with non-zero length. ("punch hole")
566 	 */
567 
568 	ASSERT(len > 0 || (mode == zev_truncate && len == 0));
569 	*signature_cnt = 0;
570 
571 	/* start of this megabyte */
572 	off_l1 = P2ALIGN(off, ZEV_L1_SIZE);
573 	/* full megabytes */
574 	if (len == 0) {
575 		/* truncate(): we'll look at the last lv1 block, only. */
576 		len_l1 = ZEV_L1_SIZE;
577 	} else {
578 		len_l1 = len + (off - off_l1);
579 		len_l1 = P2ROUNDUP(len_l1, ZEV_L1_SIZE);
580 	}
581 
582 	file = zev_chksum_cache_file_get_and_hold(zp);
583 	zev_chksum_cache_invalidate(file, zp, mode, off, len);
584 	buf = zev_alloc(ZEV_L0_SIZE);
585 
586 	ret = zev_get_result_buffer(result, result_buf_len, max_result_len,
587 	                            zp, off, len, mode);
588 	if (ret) {
589 		zev_free(buf, ZEV_L0_SIZE);
590 		zev_chksum_cache_file_release(file);
591 		return ret;
592 	}
593 	if (*result == NULL) {
594 		/* we're done */
595 		zev_free(buf, ZEV_L0_SIZE);
596 		zev_chksum_cache_file_release(file);
597 		return 0;
598 	}
599 	sig = *result;
600 
601 	for (pos_l1 = off_l1; pos_l1 < (off_l1+len_l1); pos_l1 += ZEV_L1_SIZE) {
602 
603 		if (pos_l1 > zp->z_size) {
604 			cmn_err(CE_WARN, "zev_get_checksums: off+len beyond "
605 			        "EOF.  Unexpected behaviour; please fix!");
606 			break;
607 		}
608 
609 		/*
610 		 * Since we have a reference to 'file' 'cs' can't be expired.
611 		 * Since our ranges are range locked, other threads woun't
612 		 * touch our checksum entries. (not even read them)
613 		 * Hence, we don't need to hold() or release() 'cs'.
614 		 */
615 		cs = zev_chksum_cache_get_lv1_entry(file, pos_l1);
616 
617 		l0_block_no = 0;
618 		non_empty_l0_blocks = 0;
619 		bzero(sig_l1, sizeof(sig_l1));
620 		for (pos_l0 = pos_l1;
621 		     pos_l0 < (pos_l1 + ZEV_L1_SIZE);
622 		     pos_l0 += ZEV_L0_SIZE){
623 
624 			if (pos_l0 >= zp->z_size)
625 				break;	/* EOF */
626 
627 			if (zev_chksum_cache_get(sig_l0, file,cs,pos_l0) != 0) {
628 
629 				/* signature is not cached, yet. */
630 				ret = zev_safe_read(zp, buf,
631 				                    pos_l0, ZEV_L0_SIZE);
632 				if (ret < 0) {
633 					zev_free(*result, *result_buf_len);
634 					zev_free(buf, ZEV_L0_SIZE);
635 					zev_chksum_cache_file_release(file);
636 					return ret;
637 				}
638 				/* pad buffer with zeros if necessary */
639 				if (ret < ZEV_L0_SIZE)
640 					bzero(buf + ret, ZEV_L0_SIZE - ret);
641 
642 				/* calculate signature */
643 				zev_l0_sig(sig_l0, buf);
644 
645 				zev_chksum_cache_put(sig_l0, file, cs, pos_l0);
646 			}
647 
648 			if (!memcmp(sig_l0, all_zero_sig, SHA1_DIGEST_LENGTH)) {
649 				/* all-zero l0 block.  omit signature. */
650 				l0_block_no++;
651 				continue;
652 			}
653 			non_empty_l0_blocks++;
654 			zev_l0_blocksig(blk_sig_l0, sig_l0, l0_block_no);
655 			zev_l1_add(sig_l1, blk_sig_l0);
656 
657 			if (((pos_l0 + ZEV_L0_SIZE - 1) >= off) &&
658 			    (pos_l0 < (off + len - 1))) {
659 				zev_append_sig(sig++, 0, pos_l0, sig_l0);
660 			}
661 
662 			l0_block_no++;
663 		}
664 
665 		if (non_empty_l0_blocks && (zp->z_size > ZEV_L0_SIZE))
666 			zev_append_sig(sig++, 1, pos_l1, sig_l1);
667 	}
668 
669 	*signature_cnt = ((char *)sig - (char *)*result) / sizeof(zev_sig_t);
670 
671 	zev_free(buf, ZEV_L0_SIZE);
672 	zev_chksum_cache_file_release(file);
673 	return 0;
674 }
675 
676 int
677 zev_ioc_get_signatures(intptr_t arg, int mode)
678 {
679 	zev_ioctl_get_signatures_t gs;
680 	file_t *fp;
681 	int ret = 0;
682 	znode_t *zp;
683 	zev_sig_t *sig_buf = NULL;
684 	uint64_t sig_buf_len;
685 	uint64_t sig_cnt = 0;
686 	uint64_t sig_len;
687 	char *dst;
688 	int range_locked = 0;
689 	rl_t *rl;
690 	ssize_t	lock_off;
691 	ssize_t lock_len;
692 
693 	if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0)
694 		return EFAULT;
695 	fp = getf(gs.zev_fd);
696 	if (fp == NULL)
697 		return EBADF;
698 	if (fp->f_vnode->v_vfsp->vfs_fstype != zfsfstype) {
699 		ret = EINVAL;
700 		goto out;
701 	}
702 	if (fp->f_vnode->v_type != VREG) {
703 		ret = EINVAL;
704 		goto out;
705 	}
706 	zp = VTOZ(fp->f_vnode);
707 	if (gs.zev_offset >= zp->z_size) {
708 		ret = EINVAL;
709 		goto out;
710 	}
711 
712 	/* range lock data */
713 	lock_off = P2ALIGN(gs.zev_offset, ZEV_L1_SIZE);
714 	lock_len = gs.zev_len + (gs.zev_offset - lock_off);
715 	lock_len = P2ROUNDUP(lock_len, ZEV_L1_SIZE);
716 	rl = zfs_range_lock(zp, lock_off, lock_len, RL_READER);
717 	range_locked = 1;
718 
719 	/* get checksums */
720 	ret = zev_get_checksums(&sig_buf, &sig_buf_len, &sig_cnt,
721 	                        gs.zev_bufsize,
722 	                        zp, gs.zev_offset, gs.zev_len, zev_write);
723 	if (ret)
724 		goto out;
725 
726 	/* copy to userland */
727 	sig_len = sig_cnt * sizeof(zev_sig_t);
728 	gs.zev_signature_cnt = sig_cnt;
729 	if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) {
730 		ret = EFAULT;
731 		goto out;
732 	}
733 	if (sig_cnt && sig_buf) {
734 		dst = (char *)arg + sizeof(gs);
735 		if (ddi_copyout(sig_buf, (void *)dst, sig_len, mode) != 0) {
736 			ret = EFAULT;
737 			goto out;
738 		}
739 	}
740 out:
741 	if (sig_buf)
742 		zev_free(sig_buf, sig_buf_len);
743 	if (range_locked)
744 		zfs_range_unlock(rl);
745 	releasef(gs.zev_fd);
746 	return ret;
747 }
748 
749