xref: /titanic_41/usr/src/uts/common/fs/zev/zev_checksums.c (revision de5d7d2ae3b023a958df7ca3bb8d40aa9638fb30)
1d8e62babSAndreas Jaekel #include <sys/zfs_events.h>
2d8e62babSAndreas Jaekel #include <sys/zev_checksums.h>
3d8e62babSAndreas Jaekel #include <sys/fs/zev.h>
4d8e62babSAndreas Jaekel #include <sys/zfs_znode.h>
5d8e62babSAndreas Jaekel #include <sys/sha1.h>
6d8e62babSAndreas Jaekel #include <sys/avl.h>
7d8e62babSAndreas Jaekel #include <sys/sysmacros.h>
8d8e62babSAndreas Jaekel #include <sys/fs/zev.h>
96b4c2eb9SAndreas Jaekel #include <sys/zfs_rlock.h>
1086d83651SAndreas Jaekel #include <sys/list.h>
11d8e62babSAndreas Jaekel 
12d8e62babSAndreas Jaekel typedef struct zev_sig_cache_chksums_t {
13d8e62babSAndreas Jaekel 	/* begin of key */
14d8e62babSAndreas Jaekel 	uint64_t			offset_l1;
15d8e62babSAndreas Jaekel 	/* end of key */
16d8e62babSAndreas Jaekel 	avl_node_t			avl_node;
17d8e62babSAndreas Jaekel 	uint8_t		sigs[ZEV_L1_SIZE/ZEV_L0_SIZE][SHA1_DIGEST_LENGTH];
18d8e62babSAndreas Jaekel } zev_sig_cache_chksums_t;
19d8e62babSAndreas Jaekel 
20d8e62babSAndreas Jaekel typedef struct zev_sig_cache_file_t {
21d8e62babSAndreas Jaekel 	/* begin of key */
22d8e62babSAndreas Jaekel 	uint64_t			guid;
23d8e62babSAndreas Jaekel 	uint64_t			ino;
24d8e62babSAndreas Jaekel 	uint64_t			gen;
25d8e62babSAndreas Jaekel 	/* end of key */
26d8e62babSAndreas Jaekel 	uint32_t			refcnt;
2786d83651SAndreas Jaekel 	list_node_t			lru_node;
28d8e62babSAndreas Jaekel 	avl_node_t			avl_node;
29d8e62babSAndreas Jaekel 	avl_tree_t			chksums;
30d8e62babSAndreas Jaekel } zev_sig_cache_file_t;
31d8e62babSAndreas Jaekel 
32d8e62babSAndreas Jaekel typedef struct zev_sig_cache_t {
33d8e62babSAndreas Jaekel 	kmutex_t			mutex;
34d8e62babSAndreas Jaekel 	uint64_t			cache_size;
35d8e62babSAndreas Jaekel 	uint64_t			max_cache_size;
36d8e62babSAndreas Jaekel 	uint64_t			hits;
37d8e62babSAndreas Jaekel 	uint64_t			misses;
3886d83651SAndreas Jaekel 	list_t				lru;
39d8e62babSAndreas Jaekel 	avl_tree_t			files;
40d8e62babSAndreas Jaekel } zev_sig_cache_t;
41d8e62babSAndreas Jaekel 
42d8e62babSAndreas Jaekel extern offset_t zfs_read_chunk_size;	/* tuneable from zfs_vnops.c */
43d8e62babSAndreas Jaekel 
44d8e62babSAndreas Jaekel static uint8_t all_zero_sig[SHA1_DIGEST_LENGTH] = {
45d8e62babSAndreas Jaekel 	0x1c, 0xea, 0xf7, 0x3d, 0xf4, 0x0e, 0x53, 0x1d, 0xf3, 0xbf,
46d8e62babSAndreas Jaekel 	0xb2, 0x6b, 0x4f, 0xb7, 0xcd, 0x95, 0xfb, 0x7b, 0xff, 0x1d
47d8e62babSAndreas Jaekel };
48d8e62babSAndreas Jaekel 
49d8e62babSAndreas Jaekel static uint8_t unknown_sig[SHA1_DIGEST_LENGTH] = {
50d8e62babSAndreas Jaekel 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
51d8e62babSAndreas Jaekel 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
52d8e62babSAndreas Jaekel };
53d8e62babSAndreas Jaekel 
54d8e62babSAndreas Jaekel static zev_sig_cache_t	zev_sig_cache;
55d8e62babSAndreas Jaekel 
56d8e62babSAndreas Jaekel static int
zev_cache_file_cmp(const void * entry_a,const void * entry_b)57d8e62babSAndreas Jaekel zev_cache_file_cmp(const void *entry_a, const void *entry_b)
58d8e62babSAndreas Jaekel {
59d8e62babSAndreas Jaekel 	const zev_sig_cache_file_t *a = entry_a;
60d8e62babSAndreas Jaekel 	const zev_sig_cache_file_t *b = entry_b;
61d8e62babSAndreas Jaekel 
62d8e62babSAndreas Jaekel 	if (a->guid < b->guid)
63d8e62babSAndreas Jaekel 		return -1;
64d8e62babSAndreas Jaekel 	if (a->guid > b->guid)
65d8e62babSAndreas Jaekel 		return 1;
66d8e62babSAndreas Jaekel 	if (a->ino < b->ino)
67d8e62babSAndreas Jaekel 		return -1;
68d8e62babSAndreas Jaekel 	if (a->ino > b->ino)
69d8e62babSAndreas Jaekel 		return 1;
70d8e62babSAndreas Jaekel 	if (a->gen < b->gen)
71d8e62babSAndreas Jaekel 		return -1;
72d8e62babSAndreas Jaekel 	if (a->gen > b->gen)
73d8e62babSAndreas Jaekel 		return 1;
74d8e62babSAndreas Jaekel 	return 0;
75d8e62babSAndreas Jaekel }
76d8e62babSAndreas Jaekel 
77d8e62babSAndreas Jaekel static int
zev_chksum_cache_cmp(const void * entry_a,const void * entry_b)78d8e62babSAndreas Jaekel zev_chksum_cache_cmp(const void *entry_a, const void *entry_b)
79d8e62babSAndreas Jaekel {
80d8e62babSAndreas Jaekel 	const zev_sig_cache_chksums_t *a = entry_a;
81d8e62babSAndreas Jaekel 	const zev_sig_cache_chksums_t *b = entry_b;
82d8e62babSAndreas Jaekel 
83d8e62babSAndreas Jaekel 	if (a->offset_l1 < b->offset_l1)
84d8e62babSAndreas Jaekel 		return -1;
85d8e62babSAndreas Jaekel 	if (a->offset_l1 > b->offset_l1)
86d8e62babSAndreas Jaekel 		return 1;
87d8e62babSAndreas Jaekel 	return 0;
88d8e62babSAndreas Jaekel }
89d8e62babSAndreas Jaekel 
90d8e62babSAndreas Jaekel /* must be called with zev_sig_cache.mutex held */
91d8e62babSAndreas Jaekel static void
zev_chksum_cache_file_free(zev_sig_cache_file_t * file)92d8e62babSAndreas Jaekel zev_chksum_cache_file_free(zev_sig_cache_file_t *file)
93d8e62babSAndreas Jaekel {
94d8e62babSAndreas Jaekel 	zev_sig_cache_chksums_t *cs;
95d8e62babSAndreas Jaekel 	void *c = NULL; /* cookie */
96d8e62babSAndreas Jaekel 
97d8e62babSAndreas Jaekel 	/* remove from lru list */
9886d83651SAndreas Jaekel 	list_remove(&zev_sig_cache.lru, file);
99d8e62babSAndreas Jaekel 	/* free resources */
100d8e62babSAndreas Jaekel 	avl_remove(&zev_sig_cache.files, file);
101d8e62babSAndreas Jaekel 	while ((cs = avl_destroy_nodes(&file->chksums, &c)) != NULL) {
102d8e62babSAndreas Jaekel 		zev_sig_cache.cache_size -= sizeof(*cs);
103d8e62babSAndreas Jaekel 		zev_free(cs, sizeof(*cs));
104d8e62babSAndreas Jaekel 	}
105d8e62babSAndreas Jaekel 	avl_destroy(&file->chksums);
106d8e62babSAndreas Jaekel 	zev_free(file, sizeof(*file));
107d8e62babSAndreas Jaekel 	zev_sig_cache.cache_size -= sizeof(*file);
108d8e62babSAndreas Jaekel }
109d8e62babSAndreas Jaekel 
110d8e62babSAndreas Jaekel void
zev_chksum_init(void)111d8e62babSAndreas Jaekel zev_chksum_init(void)
112d8e62babSAndreas Jaekel {
113d8e62babSAndreas Jaekel 	memset(&zev_sig_cache, 0, sizeof(zev_sig_cache));
114d8e62babSAndreas Jaekel 	mutex_init(&zev_sig_cache.mutex, NULL, MUTEX_DRIVER, NULL);
115d8e62babSAndreas Jaekel 	avl_create(&zev_sig_cache.files, zev_cache_file_cmp,
116d8e62babSAndreas Jaekel 	           sizeof(zev_sig_cache_file_t),
117d8e62babSAndreas Jaekel 	           offsetof(zev_sig_cache_file_t, avl_node));
11886d83651SAndreas Jaekel 	list_create(&zev_sig_cache.lru,
11986d83651SAndreas Jaekel 	            sizeof(zev_sig_cache_file_t),
12086d83651SAndreas Jaekel 	            offsetof(zev_sig_cache_file_t, lru_node));
121d8e62babSAndreas Jaekel 	zev_sig_cache.max_cache_size = ZEV_CHKSUM_DEFAULT_CACHE_SIZE;
122d8e62babSAndreas Jaekel }
123d8e62babSAndreas Jaekel 
124d8e62babSAndreas Jaekel void
zev_chksum_fini(void)125d8e62babSAndreas Jaekel zev_chksum_fini(void)
126d8e62babSAndreas Jaekel {
127d8e62babSAndreas Jaekel 	zev_sig_cache_file_t *file;
128d8e62babSAndreas Jaekel 
129d8e62babSAndreas Jaekel 	mutex_destroy(&zev_sig_cache.mutex);
130d8e62babSAndreas Jaekel 	while ((file = avl_first(&zev_sig_cache.files)) != NULL)
131d8e62babSAndreas Jaekel 		zev_chksum_cache_file_free(file);
13286d83651SAndreas Jaekel 	list_destroy(&zev_sig_cache.lru);
133d8e62babSAndreas Jaekel 	avl_destroy(&zev_sig_cache.files);
134d8e62babSAndreas Jaekel }
135d8e62babSAndreas Jaekel 
136d8e62babSAndreas Jaekel static zev_sig_cache_file_t *
zev_chksum_cache_file_get_and_hold(znode_t * zp)137d8e62babSAndreas Jaekel zev_chksum_cache_file_get_and_hold(znode_t *zp)
138d8e62babSAndreas Jaekel {
139d8e62babSAndreas Jaekel 	zev_sig_cache_file_t find_file;
140d8e62babSAndreas Jaekel 	zev_sig_cache_file_t *file;
141d8e62babSAndreas Jaekel 	avl_index_t where;
142d8e62babSAndreas Jaekel 
1430358d6fdSSimon Klinkert 	find_file.guid =
1440358d6fdSSimon Klinkert 		dsl_dataset_phys(zp->z_zfsvfs->z_os->os_dsl_dataset)->ds_guid;
145d8e62babSAndreas Jaekel 	find_file.ino = zp->z_id;
146d8e62babSAndreas Jaekel 	find_file.gen = zp->z_gen;
147d8e62babSAndreas Jaekel 
148d8e62babSAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
149d8e62babSAndreas Jaekel 	file = avl_find(&zev_sig_cache.files, &find_file, &where);
150d8e62babSAndreas Jaekel 	if (!file) {
151d8e62babSAndreas Jaekel 		file = zev_alloc(sizeof(*file));
152d8e62babSAndreas Jaekel 		file->guid =
1530358d6fdSSimon Klinkert 		    dsl_dataset_phys(zp->z_zfsvfs->z_os->os_dsl_dataset)->ds_guid;
154d8e62babSAndreas Jaekel 		file->ino = zp->z_id;
155d8e62babSAndreas Jaekel 		file->gen = zp->z_gen;
156d8e62babSAndreas Jaekel 		file->refcnt = 0;
157d8e62babSAndreas Jaekel 		avl_create(&file->chksums, zev_chksum_cache_cmp,
158d8e62babSAndreas Jaekel 		           sizeof(zev_sig_cache_chksums_t),
159d8e62babSAndreas Jaekel 		           offsetof(zev_sig_cache_chksums_t, avl_node));
16086d83651SAndreas Jaekel 		list_insert_head(&zev_sig_cache.lru, file);
161d8e62babSAndreas Jaekel 		avl_insert(&zev_sig_cache.files, file, where);
162d8e62babSAndreas Jaekel 		zev_sig_cache.cache_size += sizeof(*file);
163d8e62babSAndreas Jaekel 	}
164d8e62babSAndreas Jaekel 	file->refcnt++;
165d8e62babSAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
166d8e62babSAndreas Jaekel 	return file;
167d8e62babSAndreas Jaekel }
168d8e62babSAndreas Jaekel 
169d8e62babSAndreas Jaekel static void
zev_chksum_cache_file_release(zev_sig_cache_file_t * file)170d8e62babSAndreas Jaekel zev_chksum_cache_file_release(zev_sig_cache_file_t *file)
171d8e62babSAndreas Jaekel {
172d8e62babSAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
173d8e62babSAndreas Jaekel 
174d8e62babSAndreas Jaekel 	/* We don't invalidate/free/destroy *file. Cache expiry does that */
175d8e62babSAndreas Jaekel 	file->refcnt--;
176d8e62babSAndreas Jaekel 
177d8e62babSAndreas Jaekel 	/* Move file to front of lru list */
17886d83651SAndreas Jaekel 	list_remove(&zev_sig_cache.lru, file);
17986d83651SAndreas Jaekel 	list_insert_head(&zev_sig_cache.lru, file);
180d8e62babSAndreas Jaekel 
181d8e62babSAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
182d8e62babSAndreas Jaekel }
183d8e62babSAndreas Jaekel 
184d8e62babSAndreas Jaekel static  zev_sig_cache_chksums_t *
zev_chksum_cache_get_lv1_entry(zev_sig_cache_file_t * file,uint64_t off_l1)185d8e62babSAndreas Jaekel zev_chksum_cache_get_lv1_entry(zev_sig_cache_file_t *file, uint64_t off_l1)
186d8e62babSAndreas Jaekel {
187d8e62babSAndreas Jaekel 	zev_sig_cache_chksums_t find_chksum;
188d8e62babSAndreas Jaekel 	zev_sig_cache_chksums_t *cs;
189d8e62babSAndreas Jaekel 	avl_index_t where;
190d8e62babSAndreas Jaekel 
19133d4c348SAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
19233d4c348SAndreas Jaekel 
193d8e62babSAndreas Jaekel 	find_chksum.offset_l1 = off_l1;
194d8e62babSAndreas Jaekel 	cs = avl_find(&file->chksums, &find_chksum, &where);
195d8e62babSAndreas Jaekel 	if (!cs) {
196d8e62babSAndreas Jaekel 		cs = zev_zalloc(sizeof(*cs));
197d8e62babSAndreas Jaekel 		cs->offset_l1 = off_l1;
198d8e62babSAndreas Jaekel 		avl_insert(&file->chksums, cs, where);
199d8e62babSAndreas Jaekel 		zev_sig_cache.cache_size += sizeof(*cs);
200d8e62babSAndreas Jaekel 	}
20133d4c348SAndreas Jaekel 
20233d4c348SAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
20333d4c348SAndreas Jaekel 
204d8e62babSAndreas Jaekel 	return cs;
205d8e62babSAndreas Jaekel }
206d8e62babSAndreas Jaekel 
207d8e62babSAndreas Jaekel void
zev_chksum_stats(uint64_t * c_size,uint64_t * c_hits,uint64_t * c_misses)208d8e62babSAndreas Jaekel zev_chksum_stats(uint64_t *c_size, uint64_t *c_hits, uint64_t *c_misses)
209d8e62babSAndreas Jaekel {
210d8e62babSAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
211d8e62babSAndreas Jaekel 	*c_size = zev_sig_cache.cache_size;
212d8e62babSAndreas Jaekel 	*c_hits = zev_sig_cache.hits;
213d8e62babSAndreas Jaekel 	*c_misses = zev_sig_cache.misses;
214d8e62babSAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
215d8e62babSAndreas Jaekel }
216d8e62babSAndreas Jaekel 
217d8e62babSAndreas Jaekel static void
zev_chksum_cache_invalidate(zev_sig_cache_file_t * file,znode_t * zp,zev_chksum_mode_t mode,uint64_t off,uint64_t len)218d8e62babSAndreas Jaekel zev_chksum_cache_invalidate(zev_sig_cache_file_t *file,
219d8e62babSAndreas Jaekel                             znode_t *zp,
220d8e62babSAndreas Jaekel                             zev_chksum_mode_t mode,
221d8e62babSAndreas Jaekel                             uint64_t off,
222d8e62babSAndreas Jaekel                             uint64_t len)
223d8e62babSAndreas Jaekel {
224d8e62babSAndreas Jaekel 	zev_sig_cache_chksums_t find_chksum;
225d8e62babSAndreas Jaekel 	zev_sig_cache_chksums_t *cs;
226d8e62babSAndreas Jaekel 	int idx;
227d8e62babSAndreas Jaekel 	uint64_t off_l1;
228d8e62babSAndreas Jaekel 	uint64_t len_l1;
229d8e62babSAndreas Jaekel 	uint64_t pos_l0;
230d8e62babSAndreas Jaekel 	uint64_t pos_l1;
231d8e62babSAndreas Jaekel 
232d8e62babSAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
233d8e62babSAndreas Jaekel 
234d8e62babSAndreas Jaekel 	/* start of this megabyte */
235d8e62babSAndreas Jaekel 	off_l1 = P2ALIGN(off, ZEV_L1_SIZE);
236d8e62babSAndreas Jaekel 
237d8e62babSAndreas Jaekel 	if (len == 0) {
238d8e62babSAndreas Jaekel 		/* truncate() to EOF */
239d8e62babSAndreas Jaekel 		len_l1 = ZEV_L1_SIZE;
240d8e62babSAndreas Jaekel 	} else {
241d8e62babSAndreas Jaekel 		/* full megabytes */
242d8e62babSAndreas Jaekel 		len_l1 = len + (off - off_l1);
243d8e62babSAndreas Jaekel 		len_l1 = P2ROUNDUP(len_l1, ZEV_L1_SIZE);
244d8e62babSAndreas Jaekel 	}
245d8e62babSAndreas Jaekel 
246d8e62babSAndreas Jaekel 	for (pos_l1 = off_l1; pos_l1 < (off_l1+len_l1); pos_l1 += ZEV_L1_SIZE) {
247d8e62babSAndreas Jaekel 
248d8e62babSAndreas Jaekel 		find_chksum.offset_l1 = pos_l1;
249d8e62babSAndreas Jaekel 		cs = avl_find(&file->chksums, &find_chksum, NULL);
250d8e62babSAndreas Jaekel 		if (!cs)
251d8e62babSAndreas Jaekel 			continue;
252d8e62babSAndreas Jaekel 
253d8e62babSAndreas Jaekel 		for (pos_l0 = MAX(pos_l1, P2ALIGN(off, ZEV_L0_SIZE));
254d8e62babSAndreas Jaekel 		     pos_l0 < (pos_l1 + ZEV_L1_SIZE);
255d8e62babSAndreas Jaekel 		     pos_l0 += ZEV_L0_SIZE){
256d8e62babSAndreas Jaekel 
257cefd41bcSAndreas Jaekel 			if ((len > 0) && (pos_l0 > (off + len - 1)))
258d8e62babSAndreas Jaekel 				break;
259d8e62babSAndreas Jaekel 
260d8e62babSAndreas Jaekel 			idx = (pos_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
261d8e62babSAndreas Jaekel 			memcpy(cs->sigs[idx], unknown_sig, SHA1_DIGEST_LENGTH);
262d8e62babSAndreas Jaekel 		}
263d8e62babSAndreas Jaekel 	}
264d8e62babSAndreas Jaekel 
265d8e62babSAndreas Jaekel 	if (len == 0) {
266d8e62babSAndreas Jaekel 		/* truncate() to EOF -> invalidate all l1 sigs beyond EOF */
267d8e62babSAndreas Jaekel 		while ((cs = avl_last(&file->chksums)) != NULL) {
268d8e62babSAndreas Jaekel 			if (cs->offset_l1 < zp->z_size)
269d8e62babSAndreas Jaekel 				break;
270d8e62babSAndreas Jaekel 			avl_remove(&file->chksums, cs);
271d8e62babSAndreas Jaekel 			zev_sig_cache.cache_size -= sizeof(*cs);
272d8e62babSAndreas Jaekel 			zev_free(cs, sizeof(*cs));
273d8e62babSAndreas Jaekel 		}
274d8e62babSAndreas Jaekel 	}
275d8e62babSAndreas Jaekel 
276d8e62babSAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
277d8e62babSAndreas Jaekel }
278d8e62babSAndreas Jaekel 
279d8e62babSAndreas Jaekel static int
zev_chksum_cache_get(uint8_t * dst,zev_sig_cache_file_t * file,zev_sig_cache_chksums_t * cs,uint64_t off_l0)280d8e62babSAndreas Jaekel zev_chksum_cache_get(uint8_t *dst,
281d8e62babSAndreas Jaekel                      zev_sig_cache_file_t *file,
282d8e62babSAndreas Jaekel                      zev_sig_cache_chksums_t *cs,
283d8e62babSAndreas Jaekel                      uint64_t off_l0)
284d8e62babSAndreas Jaekel {
285d8e62babSAndreas Jaekel 	int idx;
286d8e62babSAndreas Jaekel 
287d8e62babSAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
288d8e62babSAndreas Jaekel 
289d8e62babSAndreas Jaekel 	idx = (off_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
290d8e62babSAndreas Jaekel 	if (!memcmp(cs->sigs[idx], unknown_sig, SHA1_DIGEST_LENGTH)) {
291d8e62babSAndreas Jaekel 		zev_sig_cache.misses++;
292d8e62babSAndreas Jaekel 		mutex_exit(&zev_sig_cache.mutex);
293d8e62babSAndreas Jaekel 		return ENOENT;
294d8e62babSAndreas Jaekel 	}
295d8e62babSAndreas Jaekel 	memcpy(dst, cs->sigs[idx], SHA1_DIGEST_LENGTH);
296d8e62babSAndreas Jaekel 	zev_sig_cache.hits++;
297d8e62babSAndreas Jaekel 
298d8e62babSAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
299d8e62babSAndreas Jaekel 	return 0;
300d8e62babSAndreas Jaekel }
301d8e62babSAndreas Jaekel 
302d8e62babSAndreas Jaekel static void
zev_chksum_cache_put(uint8_t * sig,zev_sig_cache_file_t * file,zev_sig_cache_chksums_t * cs,uint64_t off_l0)303d8e62babSAndreas Jaekel zev_chksum_cache_put(uint8_t *sig,
304d8e62babSAndreas Jaekel                      zev_sig_cache_file_t *file,
305d8e62babSAndreas Jaekel                      zev_sig_cache_chksums_t *cs,
306d8e62babSAndreas Jaekel                      uint64_t off_l0)
307d8e62babSAndreas Jaekel {
308d8e62babSAndreas Jaekel 	zev_sig_cache_file_t *f;
3096a7145cbSAndreas Jaekel 	zev_sig_cache_file_t *tmp;
310d8e62babSAndreas Jaekel 	int idx;
311d8e62babSAndreas Jaekel 
312d8e62babSAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
313d8e62babSAndreas Jaekel 
314d8e62babSAndreas Jaekel 	if (zev_sig_cache.max_cache_size == 0) {
315d8e62babSAndreas Jaekel 		/* cache disabled */
316d8e62babSAndreas Jaekel 		mutex_exit(&zev_sig_cache.mutex);
317d8e62babSAndreas Jaekel 		return;
318d8e62babSAndreas Jaekel 	}
319d8e62babSAndreas Jaekel 
320d8e62babSAndreas Jaekel 	/* expire entries until there's room in the cache */
32186d83651SAndreas Jaekel 	f = list_tail(&zev_sig_cache.lru);
3226a7145cbSAndreas Jaekel 	while (f && (zev_sig_cache.cache_size > zev_sig_cache.max_cache_size)){
3236a7145cbSAndreas Jaekel 		tmp = f;
32486d83651SAndreas Jaekel 		f = list_prev(&zev_sig_cache.lru, f);
3256a7145cbSAndreas Jaekel 		if (tmp->refcnt == 0)
3266a7145cbSAndreas Jaekel 			zev_chksum_cache_file_free(tmp);
327d8e62babSAndreas Jaekel 	}
328d8e62babSAndreas Jaekel 
329d8e62babSAndreas Jaekel 	idx = (off_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
330d8e62babSAndreas Jaekel 	memcpy(cs->sigs[idx], sig, SHA1_DIGEST_LENGTH);
331d8e62babSAndreas Jaekel 
332d8e62babSAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
333d8e62babSAndreas Jaekel 	return;
334d8e62babSAndreas Jaekel }
335d8e62babSAndreas Jaekel 
336d8e62babSAndreas Jaekel /* verbatim from zfs_vnops.c (unfortunatly it's declared static, there) */
337d8e62babSAndreas Jaekel static int
mappedread(vnode_t * vp,int nbytes,uio_t * uio)338d8e62babSAndreas Jaekel mappedread(vnode_t *vp, int nbytes, uio_t *uio)
339d8e62babSAndreas Jaekel {
340d8e62babSAndreas Jaekel 	znode_t *zp = VTOZ(vp);
341d8e62babSAndreas Jaekel 	objset_t *os = zp->z_zfsvfs->z_os;
342d8e62babSAndreas Jaekel 	int64_t	start, off;
343d8e62babSAndreas Jaekel 	int len = nbytes;
344d8e62babSAndreas Jaekel 	int error = 0;
345d8e62babSAndreas Jaekel 
346d8e62babSAndreas Jaekel 	start = uio->uio_loffset;
347d8e62babSAndreas Jaekel 	off = start & PAGEOFFSET;
348d8e62babSAndreas Jaekel 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
349d8e62babSAndreas Jaekel 		page_t *pp;
350d8e62babSAndreas Jaekel 		uint64_t bytes = MIN(PAGESIZE - off, len);
351d8e62babSAndreas Jaekel 
352d8e62babSAndreas Jaekel 		if (pp = page_lookup(vp, start, SE_SHARED)) {
353d8e62babSAndreas Jaekel 			caddr_t va;
354d8e62babSAndreas Jaekel 
355d8e62babSAndreas Jaekel 			va = zfs_map_page(pp, S_READ);
356d8e62babSAndreas Jaekel 			error = uiomove(va + off, bytes, UIO_READ, uio);
357d8e62babSAndreas Jaekel 			zfs_unmap_page(pp, va);
358d8e62babSAndreas Jaekel 			page_unlock(pp);
359d8e62babSAndreas Jaekel 		} else {
360d8e62babSAndreas Jaekel 			error = dmu_read_uio(os, zp->z_id, uio, bytes);
361d8e62babSAndreas Jaekel 		}
362d8e62babSAndreas Jaekel 		len -= bytes;
363d8e62babSAndreas Jaekel 		off = 0;
364d8e62babSAndreas Jaekel 		if (error)
365d8e62babSAndreas Jaekel 			break;
366d8e62babSAndreas Jaekel 	}
367d8e62babSAndreas Jaekel 	return (error);
368d8e62babSAndreas Jaekel }
369d8e62babSAndreas Jaekel 
370d8e62babSAndreas Jaekel static int
zev_safe_read(znode_t * zp,char * buf,uint64_t off,uint64_t len)371d8e62babSAndreas Jaekel zev_safe_read(znode_t *zp, char *buf, uint64_t off, uint64_t len)
372d8e62babSAndreas Jaekel {
373d8e62babSAndreas Jaekel 	uio_t		uio;
374d8e62babSAndreas Jaekel 	struct iovec	iov;
375d8e62babSAndreas Jaekel 	ssize_t		n;
376d8e62babSAndreas Jaekel 	ssize_t		nbytes;
377d8e62babSAndreas Jaekel 	int		error = 0;
378d8e62babSAndreas Jaekel 	vnode_t		*vp = ZTOV(zp);
379d8e62babSAndreas Jaekel 	objset_t	*os = zp->z_zfsvfs->z_os;
380d8e62babSAndreas Jaekel 
381d8e62babSAndreas Jaekel 	/* set up uio */
382d8e62babSAndreas Jaekel 
383d8e62babSAndreas Jaekel 	iov.iov_base = buf;
384d8e62babSAndreas Jaekel 	iov.iov_len = ZEV_L0_SIZE;
385d8e62babSAndreas Jaekel 
386d8e62babSAndreas Jaekel 	uio.uio_iov = &iov;
387d8e62babSAndreas Jaekel 	uio.uio_iovcnt = 1;
388d8e62babSAndreas Jaekel 	uio.uio_segflg = (short)UIO_SYSSPACE;
389d8e62babSAndreas Jaekel 	uio.uio_llimit = RLIM64_INFINITY;
390d8e62babSAndreas Jaekel 	uio.uio_fmode = FREAD;
391d8e62babSAndreas Jaekel 	uio.uio_extflg = UIO_COPY_DEFAULT;
392d8e62babSAndreas Jaekel 
393d8e62babSAndreas Jaekel 	uio.uio_loffset = off;
394d8e62babSAndreas Jaekel 	uio.uio_resid = len;
395d8e62babSAndreas Jaekel 
396d8e62babSAndreas Jaekel again:
397d8e62babSAndreas Jaekel 	if (uio.uio_loffset >= zp->z_size)
398d8e62babSAndreas Jaekel 		return EINVAL;
399d8e62babSAndreas Jaekel 
400d8e62babSAndreas Jaekel 	/* don't read past EOF */
401d8e62babSAndreas Jaekel 	n = MIN(uio.uio_resid, zp->z_size - uio.uio_loffset);
402d8e62babSAndreas Jaekel 
403d8e62babSAndreas Jaekel 	/* this block was essentially copied from zfs_read() in zfs_vnops.c */
404d8e62babSAndreas Jaekel 	while (n > 0) {
405d8e62babSAndreas Jaekel 		nbytes = MIN(n, zfs_read_chunk_size -
406d8e62babSAndreas Jaekel 		    P2PHASE(uio.uio_loffset, zfs_read_chunk_size));
407d8e62babSAndreas Jaekel 
408d8e62babSAndreas Jaekel 		if (vn_has_cached_data(vp)) {
409d8e62babSAndreas Jaekel 			error = mappedread(vp, nbytes, &uio);
410d8e62babSAndreas Jaekel 		} else {
411d8e62babSAndreas Jaekel 			error = dmu_read_uio(os, zp->z_id, &uio, nbytes);
412d8e62babSAndreas Jaekel 		}
413d8e62babSAndreas Jaekel 		if (error) {
4144ab1323aSSimon Klinkert 			if (error == EINTR)
415d8e62babSAndreas Jaekel 				goto again;
416d8e62babSAndreas Jaekel 			/* convert checksum errors into IO errors */
417d8e62babSAndreas Jaekel 			if (error == ECKSUM)
418d8e62babSAndreas Jaekel 				error = SET_ERROR(EIO);
419d8e62babSAndreas Jaekel 			break;
420d8e62babSAndreas Jaekel 		}
421d8e62babSAndreas Jaekel 
422d8e62babSAndreas Jaekel 		n -= nbytes;
423d8e62babSAndreas Jaekel 	}
424d8e62babSAndreas Jaekel 
425d8e62babSAndreas Jaekel 	if (error)
426d8e62babSAndreas Jaekel 		return error;
427d8e62babSAndreas Jaekel 	return len - uio.uio_resid;
428d8e62babSAndreas Jaekel }
429d8e62babSAndreas Jaekel 
430d8e62babSAndreas Jaekel static void
zev_l0_sig(uint8_t * sig,char * buf)431d8e62babSAndreas Jaekel zev_l0_sig(uint8_t *sig, char *buf)
432d8e62babSAndreas Jaekel {
433d8e62babSAndreas Jaekel 	SHA1_CTX	ctx;
434d8e62babSAndreas Jaekel 
435d8e62babSAndreas Jaekel 	SHA1Init(&ctx);
436d8e62babSAndreas Jaekel 	SHA1Update(&ctx, buf, ZEV_L0_SIZE);
437d8e62babSAndreas Jaekel 	SHA1Final(sig, &ctx);
438d8e62babSAndreas Jaekel 	return;
439d8e62babSAndreas Jaekel }
440d8e62babSAndreas Jaekel 
441d8e62babSAndreas Jaekel static void
zev_l0_blocksig(uint8_t * blk_sig,uint8_t * l0_sig,uint8_t block_no)442d8e62babSAndreas Jaekel zev_l0_blocksig(uint8_t *blk_sig, uint8_t *l0_sig, uint8_t block_no)
443d8e62babSAndreas Jaekel {
444d8e62babSAndreas Jaekel 	SHA1_CTX	ctx;
445d8e62babSAndreas Jaekel 
446d8e62babSAndreas Jaekel 	SHA1Init(&ctx);
447d8e62babSAndreas Jaekel 	SHA1Update(&ctx, l0_sig, SHA1_DIGEST_LENGTH);
448d8e62babSAndreas Jaekel 	SHA1Update(&ctx, &block_no, sizeof(block_no));
449d8e62babSAndreas Jaekel 	SHA1Final(blk_sig, &ctx);
450d8e62babSAndreas Jaekel 	return;
451d8e62babSAndreas Jaekel }
452d8e62babSAndreas Jaekel 
453d8e62babSAndreas Jaekel static void
zev_l1_add(uint8_t * sig_l1,uint8_t * sig_l0)454d8e62babSAndreas Jaekel zev_l1_add(uint8_t *sig_l1, uint8_t *sig_l0)
455d8e62babSAndreas Jaekel {
456d8e62babSAndreas Jaekel 	int	i;
457d8e62babSAndreas Jaekel 	int	s;
458d8e62babSAndreas Jaekel 	int	carry = 0;
459d8e62babSAndreas Jaekel 
460d8e62babSAndreas Jaekel 	for (i = SHA1_DIGEST_LENGTH - 1; i >= 0; --i) {
461d8e62babSAndreas Jaekel 		s = sig_l1[i] + sig_l0[i] + carry;
462d8e62babSAndreas Jaekel 		carry = s > 255 ? 1 : 0;
463d8e62babSAndreas Jaekel 		sig_l1[i] = s & 0xff;
464d8e62babSAndreas Jaekel 	}
465d8e62babSAndreas Jaekel }
466d8e62babSAndreas Jaekel 
4676b4c2eb9SAndreas Jaekel static int
zev_get_result_buffer(zev_sig_t ** buffer,uint64_t * buffer_len,uint64_t max_buffer_len,znode_t * zp,uint64_t off,uint64_t len,zev_chksum_mode_t mode)468d8e62babSAndreas Jaekel zev_get_result_buffer(zev_sig_t **buffer,
469d8e62babSAndreas Jaekel                       uint64_t *buffer_len,
4706b4c2eb9SAndreas Jaekel                       uint64_t max_buffer_len,
471d8e62babSAndreas Jaekel                       znode_t *zp,
472d8e62babSAndreas Jaekel                       uint64_t off,
473d8e62babSAndreas Jaekel                       uint64_t len,
474d8e62babSAndreas Jaekel                       zev_chksum_mode_t mode)
475d8e62babSAndreas Jaekel {
476d8e62babSAndreas Jaekel 	uint64_t	blk_start;
477d8e62babSAndreas Jaekel 	uint64_t	blk_end;
478d8e62babSAndreas Jaekel 	uint64_t	l0_blocks;
479d8e62babSAndreas Jaekel 	uint64_t	l1_blocks;
480d8e62babSAndreas Jaekel 	uint64_t	sigs;
481d8e62babSAndreas Jaekel 	int buflen;
482d8e62babSAndreas Jaekel 
483d8e62babSAndreas Jaekel 	/* calculate result set size: how many checksums will we provide? */
484d8e62babSAndreas Jaekel 
485d8e62babSAndreas Jaekel 	ASSERT(len > 0 || (mode == zev_truncate && len == 0));
486d8e62babSAndreas Jaekel 
487d8e62babSAndreas Jaekel 	if (len == 0) {
488d8e62babSAndreas Jaekel 		/* truncate */
489d8e62babSAndreas Jaekel 		l0_blocks = ((off % ZEV_L0_SIZE) == 0) ? 0 : 1;
490d8e62babSAndreas Jaekel 		l1_blocks = ((off % ZEV_L1_SIZE) == 0) ? 0 : 1;
491d8e62babSAndreas Jaekel 	} else {
492d8e62babSAndreas Jaekel 		/* how many lv1 checksums do we update? */
493d8e62babSAndreas Jaekel 		blk_start = off / ZEV_L1_SIZE;
494d8e62babSAndreas Jaekel 		blk_end = (off + len - 1) / ZEV_L1_SIZE;
495d8e62babSAndreas Jaekel 		l1_blocks = blk_end - blk_start + 1;
496d8e62babSAndreas Jaekel 		/* how many lv0 checksums do we update? */
497d8e62babSAndreas Jaekel 		blk_start = off / ZEV_L0_SIZE;
498d8e62babSAndreas Jaekel 		blk_end = (off + len - 1) / ZEV_L0_SIZE;
499d8e62babSAndreas Jaekel 		l0_blocks = blk_end - blk_start + 1;
500d8e62babSAndreas Jaekel 	}
501d8e62babSAndreas Jaekel 
502d8e62babSAndreas Jaekel 	sigs = l1_blocks + l0_blocks;
503d8e62babSAndreas Jaekel 	if (sigs == 0) {
504d8e62babSAndreas Jaekel 		*buffer = NULL;
505d8e62babSAndreas Jaekel 		*buffer_len = 0;
5066b4c2eb9SAndreas Jaekel 		return 0;
507d8e62babSAndreas Jaekel 	}
508d8e62babSAndreas Jaekel 
509d8e62babSAndreas Jaekel 	buflen = sigs * sizeof(zev_sig_t);
5106b4c2eb9SAndreas Jaekel 	if (max_buffer_len && (buflen > max_buffer_len)) {
5116b4c2eb9SAndreas Jaekel 		*buffer = NULL;
5126b4c2eb9SAndreas Jaekel 		*buffer_len = 0;
5136b4c2eb9SAndreas Jaekel 		return ENOSPC;
5146b4c2eb9SAndreas Jaekel 	}
515d8e62babSAndreas Jaekel 	*buffer_len = buflen;
516d8e62babSAndreas Jaekel 	*buffer = zev_alloc(buflen);
5176b4c2eb9SAndreas Jaekel 	return 0;
518d8e62babSAndreas Jaekel }
519d8e62babSAndreas Jaekel 
520d8e62babSAndreas Jaekel static void
zev_append_sig(zev_sig_t * s,int level,uint64_t off,uint8_t * sig)521d8e62babSAndreas Jaekel zev_append_sig(zev_sig_t *s, int level, uint64_t off, uint8_t *sig)
522d8e62babSAndreas Jaekel {
523d8e62babSAndreas Jaekel 	s->level = level;
524d8e62babSAndreas Jaekel 	s->block_offset = off;
525d8e62babSAndreas Jaekel 	memcpy(s->value, sig, SHA1_DIGEST_LENGTH);
526d8e62babSAndreas Jaekel }
527d8e62babSAndreas Jaekel 
528d8e62babSAndreas Jaekel /*
529d8e62babSAndreas Jaekel  * Calculate all l0 and l1 checksums that are affected by the given range.
530d8e62babSAndreas Jaekel  *
531d8e62babSAndreas Jaekel  * This function assumes that the ranges it needs to read are already
532d8e62babSAndreas Jaekel  * range-locked.
533d8e62babSAndreas Jaekel  */
534d8e62babSAndreas Jaekel int
zev_get_checksums(zev_sig_t ** result,uint64_t * result_buf_len,uint64_t * signature_cnt,uint64_t max_result_len,znode_t * zp,uint64_t off,uint64_t len,zev_chksum_mode_t mode)535d8e62babSAndreas Jaekel zev_get_checksums(zev_sig_t **result,
536d8e62babSAndreas Jaekel                   uint64_t *result_buf_len,
537d8e62babSAndreas Jaekel                   uint64_t *signature_cnt,
5386b4c2eb9SAndreas Jaekel                   uint64_t max_result_len,
539d8e62babSAndreas Jaekel                   znode_t *zp,
540d8e62babSAndreas Jaekel                   uint64_t off,
541d8e62babSAndreas Jaekel                   uint64_t len,
542d8e62babSAndreas Jaekel                   zev_chksum_mode_t mode)
543d8e62babSAndreas Jaekel {
544d8e62babSAndreas Jaekel 	uint64_t	off_l1;
545d8e62babSAndreas Jaekel 	uint64_t	len_l1;
546d8e62babSAndreas Jaekel 	uint64_t	pos_l1;
547d8e62babSAndreas Jaekel 	uint64_t	pos_l0;
548d8e62babSAndreas Jaekel 	char		*buf;
549d8e62babSAndreas Jaekel 	int64_t		ret;
550d8e62babSAndreas Jaekel 	uint8_t		sig_l0[SHA1_DIGEST_LENGTH];
551d8e62babSAndreas Jaekel 	uint8_t		blk_sig_l0[SHA1_DIGEST_LENGTH];
552d8e62babSAndreas Jaekel 	uint8_t		sig_l1[SHA1_DIGEST_LENGTH];
553d8e62babSAndreas Jaekel 	uint8_t		l0_block_no;
554d8e62babSAndreas Jaekel 	zev_sig_t	*sig;
555d8e62babSAndreas Jaekel 	int		non_empty_l0_blocks;
556d8e62babSAndreas Jaekel 	zev_sig_cache_file_t *file;
557d8e62babSAndreas Jaekel 	zev_sig_cache_chksums_t *cs;
558d8e62babSAndreas Jaekel 
559d8e62babSAndreas Jaekel 	/*
560d8e62babSAndreas Jaekel 	 * Note: for write events, the callback is called via
561d8e62babSAndreas Jaekel 	 *    zfs_write() -> zfs_log_write() -> zev_znode_write_cb()
562d8e62babSAndreas Jaekel 	 *
563d8e62babSAndreas Jaekel 	 * The transaction is not commited, yet.
564d8e62babSAndreas Jaekel 	 *
565d8e62babSAndreas Jaekel 	 * A write() syscall might be split into smaller chunks by zfs_write()
566d8e62babSAndreas Jaekel 	 *
567d8e62babSAndreas Jaekel 	 * zfs_write() has a range lock when this is called. (zfs_vnops.c:925)
568d8e62babSAndreas Jaekel 	 * In zev mode, the range lock will encompass all data we need
569d8e62babSAndreas Jaekel 	 * to calculate our checksums.
570d8e62babSAndreas Jaekel 	 *
571d8e62babSAndreas Jaekel 	 * The same is true for truncates with non-zero length. ("punch hole")
572d8e62babSAndreas Jaekel 	 */
573d8e62babSAndreas Jaekel 
574d8e62babSAndreas Jaekel 	ASSERT(len > 0 || (mode == zev_truncate && len == 0));
575d8e62babSAndreas Jaekel 	*signature_cnt = 0;
576d8e62babSAndreas Jaekel 
5771a818386SAndreas Jaekel 	/*
5781a818386SAndreas Jaekel 	 * Under certain circumstances we need the first l0 block's
5791a818386SAndreas Jaekel 	 * checksum, because we didn't store it in the database and
5801a818386SAndreas Jaekel 	 * can't easily get it from userspace.  Not for this exact point
5811a818386SAndreas Jaekel 	 * in time, anyway.  So we cheat a little.
5821a818386SAndreas Jaekel 	 */
5831a818386SAndreas Jaekel 	if (mode == zev_truncate && len == 0 && off == 4096) {
5841a818386SAndreas Jaekel 		/*
5851a818386SAndreas Jaekel 		 * Normally, we'd report no checkums:
5861a818386SAndreas Jaekel 		 *  - no l0 sum, because no remaining l0 block is changed
5871a818386SAndreas Jaekel 		 *  - no l1 sum, because the file is now too short for l1 sums
5881a818386SAndreas Jaekel 		 * Let's pretend we changed the first l0 block, then.
5891a818386SAndreas Jaekel 		 * Luckily the entire file is range locked during truncate().
5901a818386SAndreas Jaekel 		 */
5911a818386SAndreas Jaekel 		off = 0;
5921a818386SAndreas Jaekel 		len = 4096;
5931a818386SAndreas Jaekel 	}
5941a818386SAndreas Jaekel 
595d8e62babSAndreas Jaekel 	/* start of this megabyte */
596d8e62babSAndreas Jaekel 	off_l1 = P2ALIGN(off, ZEV_L1_SIZE);
597d8e62babSAndreas Jaekel 	/* full megabytes */
598d8e62babSAndreas Jaekel 	if (len == 0) {
599d8e62babSAndreas Jaekel 		/* truncate(): we'll look at the last lv1 block, only. */
600d8e62babSAndreas Jaekel 		len_l1 = ZEV_L1_SIZE;
601d8e62babSAndreas Jaekel 	} else {
602d8e62babSAndreas Jaekel 		len_l1 = len + (off - off_l1);
603d8e62babSAndreas Jaekel 		len_l1 = P2ROUNDUP(len_l1, ZEV_L1_SIZE);
604d8e62babSAndreas Jaekel 	}
605d8e62babSAndreas Jaekel 
606d8e62babSAndreas Jaekel 	file = zev_chksum_cache_file_get_and_hold(zp);
607d8e62babSAndreas Jaekel 	zev_chksum_cache_invalidate(file, zp, mode, off, len);
608d8e62babSAndreas Jaekel 	buf = zev_alloc(ZEV_L0_SIZE);
609d8e62babSAndreas Jaekel 
6106b4c2eb9SAndreas Jaekel 	ret = zev_get_result_buffer(result, result_buf_len, max_result_len,
6116b4c2eb9SAndreas Jaekel 	                            zp, off, len, mode);
6126b4c2eb9SAndreas Jaekel 	if (ret) {
6136b4c2eb9SAndreas Jaekel 		zev_free(buf, ZEV_L0_SIZE);
6146b4c2eb9SAndreas Jaekel 		zev_chksum_cache_file_release(file);
6156b4c2eb9SAndreas Jaekel 		return ret;
6166b4c2eb9SAndreas Jaekel 	}
617d8e62babSAndreas Jaekel 	if (*result == NULL) {
618d8e62babSAndreas Jaekel 		/* we're done */
619d8e62babSAndreas Jaekel 		zev_free(buf, ZEV_L0_SIZE);
620d8e62babSAndreas Jaekel 		zev_chksum_cache_file_release(file);
621d8e62babSAndreas Jaekel 		return 0;
622d8e62babSAndreas Jaekel 	}
623d8e62babSAndreas Jaekel 	sig = *result;
624d8e62babSAndreas Jaekel 
625d8e62babSAndreas Jaekel 	for (pos_l1 = off_l1; pos_l1 < (off_l1+len_l1); pos_l1 += ZEV_L1_SIZE) {
626d8e62babSAndreas Jaekel 
627d8e62babSAndreas Jaekel 		if (pos_l1 > zp->z_size) {
628d8e62babSAndreas Jaekel 			cmn_err(CE_WARN, "zev_get_checksums: off+len beyond "
629aafcdfa4SSimon Klinkert 			        "EOF. Unexpected behaviour; please fix! "
630aafcdfa4SSimon Klinkert 				"off=%" PRIu64 ", len=%" PRIu64 ", "
631aafcdfa4SSimon Klinkert 			        "dataset='%s', inode=%" PRIu64, off, len,
632aafcdfa4SSimon Klinkert 				zp->z_zfsvfs->z_os->
633c09b3f1aSAndreas Jaekel 			        os_dsl_dataset->ds_dir->dd_myname, zp->z_id);
634c09b3f1aSAndreas Jaekel 			zev_free(*result, *result_buf_len);
635c09b3f1aSAndreas Jaekel 			*result = NULL;
636*de5d7d2aSSimon Klinkert 			zev_free(buf, ZEV_L0_SIZE);
637*de5d7d2aSSimon Klinkert 			zev_chksum_cache_file_release(file);
638*de5d7d2aSSimon Klinkert 			return EIO;
639d8e62babSAndreas Jaekel 		}
640d8e62babSAndreas Jaekel 
641d8e62babSAndreas Jaekel 		/*
642d8e62babSAndreas Jaekel 		 * Since we have a reference to 'file' 'cs' can't be expired.
643d8e62babSAndreas Jaekel 		 * Since our ranges are range locked, other threads woun't
644d8e62babSAndreas Jaekel 		 * touch our checksum entries. (not even read them)
645d8e62babSAndreas Jaekel 		 * Hence, we don't need to hold() or release() 'cs'.
646d8e62babSAndreas Jaekel 		 */
647d8e62babSAndreas Jaekel 		cs = zev_chksum_cache_get_lv1_entry(file, pos_l1);
648d8e62babSAndreas Jaekel 
649d8e62babSAndreas Jaekel 		l0_block_no = 0;
650d8e62babSAndreas Jaekel 		non_empty_l0_blocks = 0;
651d8e62babSAndreas Jaekel 		bzero(sig_l1, sizeof(sig_l1));
652d8e62babSAndreas Jaekel 		for (pos_l0 = pos_l1;
653d8e62babSAndreas Jaekel 		     pos_l0 < (pos_l1 + ZEV_L1_SIZE);
654d8e62babSAndreas Jaekel 		     pos_l0 += ZEV_L0_SIZE){
655d8e62babSAndreas Jaekel 
656d8e62babSAndreas Jaekel 			if (pos_l0 >= zp->z_size)
657d8e62babSAndreas Jaekel 				break;	/* EOF */
658d8e62babSAndreas Jaekel 
659d8e62babSAndreas Jaekel 			if (zev_chksum_cache_get(sig_l0, file,cs,pos_l0) != 0) {
660d8e62babSAndreas Jaekel 
661d8e62babSAndreas Jaekel 				/* signature is not cached, yet. */
662d8e62babSAndreas Jaekel 				ret = zev_safe_read(zp, buf,
663d8e62babSAndreas Jaekel 				                    pos_l0, ZEV_L0_SIZE);
664d8e62babSAndreas Jaekel 				if (ret < 0) {
665d8e62babSAndreas Jaekel 					zev_free(*result, *result_buf_len);
666d8e62babSAndreas Jaekel 					zev_free(buf, ZEV_L0_SIZE);
667d8e62babSAndreas Jaekel 					zev_chksum_cache_file_release(file);
668d8e62babSAndreas Jaekel 					return ret;
669d8e62babSAndreas Jaekel 				}
670d8e62babSAndreas Jaekel 				/* pad buffer with zeros if necessary */
671d8e62babSAndreas Jaekel 				if (ret < ZEV_L0_SIZE)
672d8e62babSAndreas Jaekel 					bzero(buf + ret, ZEV_L0_SIZE - ret);
673d8e62babSAndreas Jaekel 
674d8e62babSAndreas Jaekel 				/* calculate signature */
675d8e62babSAndreas Jaekel 				zev_l0_sig(sig_l0, buf);
676d8e62babSAndreas Jaekel 
677d8e62babSAndreas Jaekel 				zev_chksum_cache_put(sig_l0, file, cs, pos_l0);
678d8e62babSAndreas Jaekel 			}
679d8e62babSAndreas Jaekel 
680d8e62babSAndreas Jaekel 			if (!memcmp(sig_l0, all_zero_sig, SHA1_DIGEST_LENGTH)) {
681d8e62babSAndreas Jaekel 				/* all-zero l0 block.  omit signature. */
682d8e62babSAndreas Jaekel 				l0_block_no++;
683d8e62babSAndreas Jaekel 				continue;
684d8e62babSAndreas Jaekel 			}
685d8e62babSAndreas Jaekel 			non_empty_l0_blocks++;
686d8e62babSAndreas Jaekel 			zev_l0_blocksig(blk_sig_l0, sig_l0, l0_block_no);
687d8e62babSAndreas Jaekel 			zev_l1_add(sig_l1, blk_sig_l0);
688d8e62babSAndreas Jaekel 
689d8e62babSAndreas Jaekel 			if (((pos_l0 + ZEV_L0_SIZE - 1) >= off) &&
69094c406e7SAndreas Jaekel 			    (pos_l0 <= (off + len - 1))) {
691d8e62babSAndreas Jaekel 				zev_append_sig(sig++, 0, pos_l0, sig_l0);
692d8e62babSAndreas Jaekel 			}
693d8e62babSAndreas Jaekel 
694d8e62babSAndreas Jaekel 			l0_block_no++;
695d8e62babSAndreas Jaekel 		}
696d8e62babSAndreas Jaekel 
697d8e62babSAndreas Jaekel 		if (non_empty_l0_blocks && (zp->z_size > ZEV_L0_SIZE))
698d8e62babSAndreas Jaekel 			zev_append_sig(sig++, 1, pos_l1, sig_l1);
699d8e62babSAndreas Jaekel 	}
700d8e62babSAndreas Jaekel 
701d8e62babSAndreas Jaekel 	*signature_cnt = ((char *)sig - (char *)*result) / sizeof(zev_sig_t);
702d8e62babSAndreas Jaekel 
703d8e62babSAndreas Jaekel 	zev_free(buf, ZEV_L0_SIZE);
704d8e62babSAndreas Jaekel 	zev_chksum_cache_file_release(file);
705d8e62babSAndreas Jaekel 	return 0;
706d8e62babSAndreas Jaekel }
7076b4c2eb9SAndreas Jaekel 
7086b4c2eb9SAndreas Jaekel int
zev_ioc_get_signatures(intptr_t arg,int mode)7096b4c2eb9SAndreas Jaekel zev_ioc_get_signatures(intptr_t arg, int mode)
7106b4c2eb9SAndreas Jaekel {
7116b4c2eb9SAndreas Jaekel 	zev_ioctl_get_signatures_t gs;
7126b4c2eb9SAndreas Jaekel 	file_t *fp;
7136b4c2eb9SAndreas Jaekel 	int ret = 0;
7146b4c2eb9SAndreas Jaekel 	znode_t *zp;
7156b4c2eb9SAndreas Jaekel 	zev_sig_t *sig_buf = NULL;
7166b4c2eb9SAndreas Jaekel 	uint64_t sig_buf_len;
7176b4c2eb9SAndreas Jaekel 	uint64_t sig_cnt = 0;
7186b4c2eb9SAndreas Jaekel 	uint64_t sig_len;
7196b4c2eb9SAndreas Jaekel 	char *dst;
7206b4c2eb9SAndreas Jaekel 	int range_locked = 0;
7216b4c2eb9SAndreas Jaekel 	rl_t *rl;
7226b4c2eb9SAndreas Jaekel 	ssize_t	lock_off;
7236b4c2eb9SAndreas Jaekel 	ssize_t lock_len;
724f8d4f0f6SAndreas Jaekel 	struct zfsvfs *zfsvfs = NULL;
7256b4c2eb9SAndreas Jaekel 
7266b4c2eb9SAndreas Jaekel 	if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0)
7276b4c2eb9SAndreas Jaekel 		return EFAULT;
7286b4c2eb9SAndreas Jaekel 	fp = getf(gs.zev_fd);
7296b4c2eb9SAndreas Jaekel 	if (fp == NULL)
7306b4c2eb9SAndreas Jaekel 		return EBADF;
7316b4c2eb9SAndreas Jaekel 	if (fp->f_vnode->v_vfsp->vfs_fstype != zfsfstype) {
7326b4c2eb9SAndreas Jaekel 		ret = EINVAL;
7336b4c2eb9SAndreas Jaekel 		goto out;
7346b4c2eb9SAndreas Jaekel 	}
735f8d4f0f6SAndreas Jaekel 	zp = VTOZ(fp->f_vnode);
736f8d4f0f6SAndreas Jaekel 
737f8d4f0f6SAndreas Jaekel 	/* modified version of ZFS_ENTER() macro - we need to clean up fp */
738f8d4f0f6SAndreas Jaekel 	zfsvfs = zp->z_zfsvfs;
739f8d4f0f6SAndreas Jaekel 	rrm_enter_read(&zfsvfs->z_teardown_lock, FTAG);
740f8d4f0f6SAndreas Jaekel 	if (zp->z_zfsvfs->z_unmounted) {
741f8d4f0f6SAndreas Jaekel 		ret = EIO;
742f8d4f0f6SAndreas Jaekel 		goto out;
743f8d4f0f6SAndreas Jaekel 	}
744f8d4f0f6SAndreas Jaekel 	/* modified version of ZFS_VERIFY_ZP() macro */
745f8d4f0f6SAndreas Jaekel 	if (zp->z_sa_hdl == NULL) {
746f8d4f0f6SAndreas Jaekel 		ret = EIO;
747f8d4f0f6SAndreas Jaekel 		goto out;
748f8d4f0f6SAndreas Jaekel 	}
749f8d4f0f6SAndreas Jaekel 
7506b4c2eb9SAndreas Jaekel 	if (fp->f_vnode->v_type != VREG) {
7516b4c2eb9SAndreas Jaekel 		ret = EINVAL;
7526b4c2eb9SAndreas Jaekel 		goto out;
7536b4c2eb9SAndreas Jaekel 	}
7546b4c2eb9SAndreas Jaekel 	if (gs.zev_offset >= zp->z_size) {
7556b4c2eb9SAndreas Jaekel 		ret = EINVAL;
7566b4c2eb9SAndreas Jaekel 		goto out;
7576b4c2eb9SAndreas Jaekel 	}
7586b4c2eb9SAndreas Jaekel 
7596b4c2eb9SAndreas Jaekel 	/* range lock data */
7606b4c2eb9SAndreas Jaekel 	lock_off = P2ALIGN(gs.zev_offset, ZEV_L1_SIZE);
7616b4c2eb9SAndreas Jaekel 	lock_len = gs.zev_len + (gs.zev_offset - lock_off);
7626b4c2eb9SAndreas Jaekel 	lock_len = P2ROUNDUP(lock_len, ZEV_L1_SIZE);
7636b4c2eb9SAndreas Jaekel 	rl = zfs_range_lock(zp, lock_off, lock_len, RL_READER);
7646b4c2eb9SAndreas Jaekel 	range_locked = 1;
7656b4c2eb9SAndreas Jaekel 
7666b4c2eb9SAndreas Jaekel 	/* get checksums */
7676b4c2eb9SAndreas Jaekel 	ret = zev_get_checksums(&sig_buf, &sig_buf_len, &sig_cnt,
7686b4c2eb9SAndreas Jaekel 	                        gs.zev_bufsize,
7696b4c2eb9SAndreas Jaekel 	                        zp, gs.zev_offset, gs.zev_len, zev_write);
7706b4c2eb9SAndreas Jaekel 	if (ret)
7716b4c2eb9SAndreas Jaekel 		goto out;
7726b4c2eb9SAndreas Jaekel 
7736b4c2eb9SAndreas Jaekel 	/* copy to userland */
7746b4c2eb9SAndreas Jaekel 	sig_len = sig_cnt * sizeof(zev_sig_t);
7756b4c2eb9SAndreas Jaekel 	gs.zev_signature_cnt = sig_cnt;
7766b4c2eb9SAndreas Jaekel 	if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) {
7776b4c2eb9SAndreas Jaekel 		ret = EFAULT;
7786b4c2eb9SAndreas Jaekel 		goto out;
7796b4c2eb9SAndreas Jaekel 	}
7806b4c2eb9SAndreas Jaekel 	if (sig_cnt && sig_buf) {
7816b4c2eb9SAndreas Jaekel 		dst = (char *)arg + sizeof(gs);
7826b4c2eb9SAndreas Jaekel 		if (ddi_copyout(sig_buf, (void *)dst, sig_len, mode) != 0) {
7836b4c2eb9SAndreas Jaekel 			ret = EFAULT;
7846b4c2eb9SAndreas Jaekel 			goto out;
7856b4c2eb9SAndreas Jaekel 		}
7866b4c2eb9SAndreas Jaekel 	}
7876b4c2eb9SAndreas Jaekel out:
7886b4c2eb9SAndreas Jaekel 	if (sig_buf)
7896b4c2eb9SAndreas Jaekel 		zev_free(sig_buf, sig_buf_len);
7906b4c2eb9SAndreas Jaekel 	if (range_locked)
7916b4c2eb9SAndreas Jaekel 		zfs_range_unlock(rl);
792f8d4f0f6SAndreas Jaekel 	if (zfsvfs)
793f8d4f0f6SAndreas Jaekel 		ZFS_EXIT(zfsvfs);
7946b4c2eb9SAndreas Jaekel 	releasef(gs.zev_fd);
7956b4c2eb9SAndreas Jaekel 	return ret;
7966b4c2eb9SAndreas Jaekel }
7976b4c2eb9SAndreas Jaekel 
798d27baf23SAndreas Jaekel void
zev_symlink_checksum(zev_znode_symlink_t * rec,char * link)799d27baf23SAndreas Jaekel zev_symlink_checksum(zev_znode_symlink_t *rec, char *link)
800d27baf23SAndreas Jaekel {
801d27baf23SAndreas Jaekel 	char buf[ZEV_L0_SIZE];
802d27baf23SAndreas Jaekel 
803d27baf23SAndreas Jaekel 	memset(buf, 0, sizeof(buf));
804d27baf23SAndreas Jaekel 	strcpy(buf, link);
805d27baf23SAndreas Jaekel 	zev_l0_sig(rec->signature.value, buf);
806d27baf23SAndreas Jaekel 	rec->signature.level = 0;
807d27baf23SAndreas Jaekel 	rec->signature.block_offset = 0;
808d27baf23SAndreas Jaekel }
809d27baf23SAndreas Jaekel 
810d27baf23SAndreas Jaekel 
811d27baf23SAndreas Jaekel void
zev_create_checksum(zev_znode_create_t * rec,znode_t * zp)812d27baf23SAndreas Jaekel zev_create_checksum(zev_znode_create_t *rec, znode_t *zp)
813d27baf23SAndreas Jaekel {
814d27baf23SAndreas Jaekel 	char buf[ZEV_L0_SIZE];
815d27baf23SAndreas Jaekel 	vnode_t *vp;
816d27baf23SAndreas Jaekel 	uint64_t rdev;
817d27baf23SAndreas Jaekel 
818d27baf23SAndreas Jaekel 	vp = ZTOV(zp);
819d27baf23SAndreas Jaekel 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
820d27baf23SAndreas Jaekel 		sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zp->z_zfsvfs),
821d27baf23SAndreas Jaekel 		          &rdev, sizeof(rdev));
822d27baf23SAndreas Jaekel 		memset(buf, 0, sizeof(buf));
823d27baf23SAndreas Jaekel 		snprintf(buf, sizeof(buf), "%c%d,%d",
824d27baf23SAndreas Jaekel 		         vp->v_type == VBLK ? 'b' : 'c',
825d27baf23SAndreas Jaekel 		         getmajor(rdev),
826d27baf23SAndreas Jaekel 		         getminor(rdev));
827d27baf23SAndreas Jaekel 		zev_l0_sig(rec->signature.value, buf);
828d27baf23SAndreas Jaekel 	} else {
829d27baf23SAndreas Jaekel 		memset(rec->signature.value, 0, sizeof(rec->signature.value));
830d27baf23SAndreas Jaekel 	}
831d27baf23SAndreas Jaekel 	rec->signature.level = 0;
832d27baf23SAndreas Jaekel 	rec->signature.block_offset = 0;
833d27baf23SAndreas Jaekel }
834d27baf23SAndreas Jaekel 
835