xref: /titanic_44/usr/src/uts/common/fs/zev/zev_checksums.c (revision 6d08434eebcfac45213e38af01c64cf2559760d3)
1205ed6bfSAndreas Jaekel #include <sys/zfs_events.h>
2205ed6bfSAndreas Jaekel #include <sys/zev_checksums.h>
3205ed6bfSAndreas Jaekel #include <sys/fs/zev.h>
4205ed6bfSAndreas Jaekel #include <sys/zfs_znode.h>
5205ed6bfSAndreas Jaekel #include <sys/sha1.h>
6205ed6bfSAndreas Jaekel #include <sys/avl.h>
7205ed6bfSAndreas Jaekel #include <sys/sysmacros.h>
8205ed6bfSAndreas Jaekel #include <sys/fs/zev.h>
942110aacSAndreas Jaekel #include <sys/zfs_rlock.h>
1010d7bc57SAndreas Jaekel #include <sys/list.h>
11205ed6bfSAndreas Jaekel 
12205ed6bfSAndreas Jaekel typedef struct zev_sig_cache_chksums_t {
13205ed6bfSAndreas Jaekel 	/* begin of key */
14205ed6bfSAndreas Jaekel 	uint64_t			offset_l1;
15205ed6bfSAndreas Jaekel 	/* end of key */
16205ed6bfSAndreas Jaekel 	avl_node_t			avl_node;
17205ed6bfSAndreas Jaekel 	uint8_t		sigs[ZEV_L1_SIZE/ZEV_L0_SIZE][SHA1_DIGEST_LENGTH];
18205ed6bfSAndreas Jaekel } zev_sig_cache_chksums_t;
19205ed6bfSAndreas Jaekel 
20205ed6bfSAndreas Jaekel typedef struct zev_sig_cache_file_t {
21205ed6bfSAndreas Jaekel 	/* begin of key */
22205ed6bfSAndreas Jaekel 	uint64_t			guid;
23205ed6bfSAndreas Jaekel 	uint64_t			ino;
24205ed6bfSAndreas Jaekel 	uint64_t			gen;
25205ed6bfSAndreas Jaekel 	/* end of key */
26205ed6bfSAndreas Jaekel 	uint32_t			refcnt;
2710d7bc57SAndreas Jaekel 	list_node_t			lru_node;
28205ed6bfSAndreas Jaekel 	avl_node_t			avl_node;
29205ed6bfSAndreas Jaekel 	avl_tree_t			chksums;
30205ed6bfSAndreas Jaekel } zev_sig_cache_file_t;
31205ed6bfSAndreas Jaekel 
32205ed6bfSAndreas Jaekel typedef struct zev_sig_cache_t {
33205ed6bfSAndreas Jaekel 	kmutex_t			mutex;
34205ed6bfSAndreas Jaekel 	uint64_t			cache_size;
35205ed6bfSAndreas Jaekel 	uint64_t			max_cache_size;
36205ed6bfSAndreas Jaekel 	uint64_t			hits;
37205ed6bfSAndreas Jaekel 	uint64_t			misses;
3810d7bc57SAndreas Jaekel 	list_t				lru;
39205ed6bfSAndreas Jaekel 	avl_tree_t			files;
40205ed6bfSAndreas Jaekel } zev_sig_cache_t;
41205ed6bfSAndreas Jaekel 
42205ed6bfSAndreas Jaekel extern offset_t zfs_read_chunk_size;	/* tuneable from zfs_vnops.c */
43205ed6bfSAndreas Jaekel 
44205ed6bfSAndreas Jaekel static uint8_t all_zero_sig[SHA1_DIGEST_LENGTH] = {
45205ed6bfSAndreas Jaekel 	0x1c, 0xea, 0xf7, 0x3d, 0xf4, 0x0e, 0x53, 0x1d, 0xf3, 0xbf,
46205ed6bfSAndreas Jaekel 	0xb2, 0x6b, 0x4f, 0xb7, 0xcd, 0x95, 0xfb, 0x7b, 0xff, 0x1d
47205ed6bfSAndreas Jaekel };
48205ed6bfSAndreas Jaekel 
49205ed6bfSAndreas Jaekel static uint8_t unknown_sig[SHA1_DIGEST_LENGTH] = {
50205ed6bfSAndreas Jaekel 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
51205ed6bfSAndreas Jaekel 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
52205ed6bfSAndreas Jaekel };
53205ed6bfSAndreas Jaekel 
54205ed6bfSAndreas Jaekel static zev_sig_cache_t	zev_sig_cache;
55205ed6bfSAndreas Jaekel 
56205ed6bfSAndreas Jaekel static int
zev_cache_file_cmp(const void * entry_a,const void * entry_b)57205ed6bfSAndreas Jaekel zev_cache_file_cmp(const void *entry_a, const void *entry_b)
58205ed6bfSAndreas Jaekel {
59205ed6bfSAndreas Jaekel 	const zev_sig_cache_file_t *a = entry_a;
60205ed6bfSAndreas Jaekel 	const zev_sig_cache_file_t *b = entry_b;
61205ed6bfSAndreas Jaekel 
62205ed6bfSAndreas Jaekel 	if (a->guid < b->guid)
63205ed6bfSAndreas Jaekel 		return -1;
64205ed6bfSAndreas Jaekel 	if (a->guid > b->guid)
65205ed6bfSAndreas Jaekel 		return 1;
66205ed6bfSAndreas Jaekel 	if (a->ino < b->ino)
67205ed6bfSAndreas Jaekel 		return -1;
68205ed6bfSAndreas Jaekel 	if (a->ino > b->ino)
69205ed6bfSAndreas Jaekel 		return 1;
70205ed6bfSAndreas Jaekel 	if (a->gen < b->gen)
71205ed6bfSAndreas Jaekel 		return -1;
72205ed6bfSAndreas Jaekel 	if (a->gen > b->gen)
73205ed6bfSAndreas Jaekel 		return 1;
74205ed6bfSAndreas Jaekel 	return 0;
75205ed6bfSAndreas Jaekel }
76205ed6bfSAndreas Jaekel 
77205ed6bfSAndreas Jaekel static int
zev_chksum_cache_cmp(const void * entry_a,const void * entry_b)78205ed6bfSAndreas Jaekel zev_chksum_cache_cmp(const void *entry_a, const void *entry_b)
79205ed6bfSAndreas Jaekel {
80205ed6bfSAndreas Jaekel 	const zev_sig_cache_chksums_t *a = entry_a;
81205ed6bfSAndreas Jaekel 	const zev_sig_cache_chksums_t *b = entry_b;
82205ed6bfSAndreas Jaekel 
83205ed6bfSAndreas Jaekel 	if (a->offset_l1 < b->offset_l1)
84205ed6bfSAndreas Jaekel 		return -1;
85205ed6bfSAndreas Jaekel 	if (a->offset_l1 > b->offset_l1)
86205ed6bfSAndreas Jaekel 		return 1;
87205ed6bfSAndreas Jaekel 	return 0;
88205ed6bfSAndreas Jaekel }
89205ed6bfSAndreas Jaekel 
90205ed6bfSAndreas Jaekel /* must be called with zev_sig_cache.mutex held */
91205ed6bfSAndreas Jaekel static void
zev_chksum_cache_file_free(zev_sig_cache_file_t * file)92205ed6bfSAndreas Jaekel zev_chksum_cache_file_free(zev_sig_cache_file_t *file)
93205ed6bfSAndreas Jaekel {
94205ed6bfSAndreas Jaekel 	zev_sig_cache_chksums_t *cs;
95205ed6bfSAndreas Jaekel 	void *c = NULL; /* cookie */
96205ed6bfSAndreas Jaekel 
97205ed6bfSAndreas Jaekel 	/* remove from lru list */
9810d7bc57SAndreas Jaekel 	list_remove(&zev_sig_cache.lru, file);
99205ed6bfSAndreas Jaekel 	/* free resources */
100205ed6bfSAndreas Jaekel 	avl_remove(&zev_sig_cache.files, file);
101205ed6bfSAndreas Jaekel 	while ((cs = avl_destroy_nodes(&file->chksums, &c)) != NULL) {
102205ed6bfSAndreas Jaekel 		zev_sig_cache.cache_size -= sizeof(*cs);
103205ed6bfSAndreas Jaekel 		zev_free(cs, sizeof(*cs));
104205ed6bfSAndreas Jaekel 	}
105205ed6bfSAndreas Jaekel 	avl_destroy(&file->chksums);
106205ed6bfSAndreas Jaekel 	zev_free(file, sizeof(*file));
107205ed6bfSAndreas Jaekel 	zev_sig_cache.cache_size -= sizeof(*file);
108205ed6bfSAndreas Jaekel }
109205ed6bfSAndreas Jaekel 
110205ed6bfSAndreas Jaekel void
zev_chksum_init(void)111205ed6bfSAndreas Jaekel zev_chksum_init(void)
112205ed6bfSAndreas Jaekel {
113205ed6bfSAndreas Jaekel 	memset(&zev_sig_cache, 0, sizeof(zev_sig_cache));
114205ed6bfSAndreas Jaekel 	mutex_init(&zev_sig_cache.mutex, NULL, MUTEX_DRIVER, NULL);
115205ed6bfSAndreas Jaekel 	avl_create(&zev_sig_cache.files, zev_cache_file_cmp,
116205ed6bfSAndreas Jaekel 	           sizeof(zev_sig_cache_file_t),
117205ed6bfSAndreas Jaekel 	           offsetof(zev_sig_cache_file_t, avl_node));
11810d7bc57SAndreas Jaekel 	list_create(&zev_sig_cache.lru,
11910d7bc57SAndreas Jaekel 	            sizeof(zev_sig_cache_file_t),
12010d7bc57SAndreas Jaekel 	            offsetof(zev_sig_cache_file_t, lru_node));
121205ed6bfSAndreas Jaekel 	zev_sig_cache.max_cache_size = ZEV_CHKSUM_DEFAULT_CACHE_SIZE;
122205ed6bfSAndreas Jaekel }
123205ed6bfSAndreas Jaekel 
124205ed6bfSAndreas Jaekel void
zev_chksum_fini(void)125205ed6bfSAndreas Jaekel zev_chksum_fini(void)
126205ed6bfSAndreas Jaekel {
127205ed6bfSAndreas Jaekel 	zev_sig_cache_file_t *file;
128205ed6bfSAndreas Jaekel 
129205ed6bfSAndreas Jaekel 	mutex_destroy(&zev_sig_cache.mutex);
130205ed6bfSAndreas Jaekel 	while ((file = avl_first(&zev_sig_cache.files)) != NULL)
131205ed6bfSAndreas Jaekel 		zev_chksum_cache_file_free(file);
13210d7bc57SAndreas Jaekel 	list_destroy(&zev_sig_cache.lru);
133205ed6bfSAndreas Jaekel 	avl_destroy(&zev_sig_cache.files);
134205ed6bfSAndreas Jaekel }
135205ed6bfSAndreas Jaekel 
136205ed6bfSAndreas Jaekel static zev_sig_cache_file_t *
zev_chksum_cache_file_get_and_hold(znode_t * zp)137205ed6bfSAndreas Jaekel zev_chksum_cache_file_get_and_hold(znode_t *zp)
138205ed6bfSAndreas Jaekel {
139205ed6bfSAndreas Jaekel 	zev_sig_cache_file_t find_file;
140205ed6bfSAndreas Jaekel 	zev_sig_cache_file_t *file;
141205ed6bfSAndreas Jaekel 	avl_index_t where;
142205ed6bfSAndreas Jaekel 
1438948de2fSSimon Klinkert 	find_file.guid =
1448948de2fSSimon Klinkert 		dsl_dataset_phys(zp->z_zfsvfs->z_os->os_dsl_dataset)->ds_guid;
145205ed6bfSAndreas Jaekel 	find_file.ino = zp->z_id;
146205ed6bfSAndreas Jaekel 	find_file.gen = zp->z_gen;
147205ed6bfSAndreas Jaekel 
148205ed6bfSAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
149205ed6bfSAndreas Jaekel 	file = avl_find(&zev_sig_cache.files, &find_file, &where);
150205ed6bfSAndreas Jaekel 	if (!file) {
151205ed6bfSAndreas Jaekel 		file = zev_alloc(sizeof(*file));
152205ed6bfSAndreas Jaekel 		file->guid =
1538948de2fSSimon Klinkert 		    dsl_dataset_phys(zp->z_zfsvfs->z_os->os_dsl_dataset)->ds_guid;
154205ed6bfSAndreas Jaekel 		file->ino = zp->z_id;
155205ed6bfSAndreas Jaekel 		file->gen = zp->z_gen;
156205ed6bfSAndreas Jaekel 		file->refcnt = 0;
157205ed6bfSAndreas Jaekel 		avl_create(&file->chksums, zev_chksum_cache_cmp,
158205ed6bfSAndreas Jaekel 		           sizeof(zev_sig_cache_chksums_t),
159205ed6bfSAndreas Jaekel 		           offsetof(zev_sig_cache_chksums_t, avl_node));
16010d7bc57SAndreas Jaekel 		list_insert_head(&zev_sig_cache.lru, file);
161205ed6bfSAndreas Jaekel 		avl_insert(&zev_sig_cache.files, file, where);
162205ed6bfSAndreas Jaekel 		zev_sig_cache.cache_size += sizeof(*file);
163205ed6bfSAndreas Jaekel 	}
164205ed6bfSAndreas Jaekel 	file->refcnt++;
165205ed6bfSAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
166205ed6bfSAndreas Jaekel 	return file;
167205ed6bfSAndreas Jaekel }
168205ed6bfSAndreas Jaekel 
169205ed6bfSAndreas Jaekel static void
zev_chksum_cache_file_release(zev_sig_cache_file_t * file)170205ed6bfSAndreas Jaekel zev_chksum_cache_file_release(zev_sig_cache_file_t *file)
171205ed6bfSAndreas Jaekel {
172205ed6bfSAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
173205ed6bfSAndreas Jaekel 
174205ed6bfSAndreas Jaekel 	/* We don't invalidate/free/destroy *file. Cache expiry does that */
175205ed6bfSAndreas Jaekel 	file->refcnt--;
176205ed6bfSAndreas Jaekel 
177205ed6bfSAndreas Jaekel 	/* Move file to front of lru list */
17810d7bc57SAndreas Jaekel 	list_remove(&zev_sig_cache.lru, file);
17910d7bc57SAndreas Jaekel 	list_insert_head(&zev_sig_cache.lru, file);
180205ed6bfSAndreas Jaekel 
181205ed6bfSAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
182205ed6bfSAndreas Jaekel }
183205ed6bfSAndreas Jaekel 
184205ed6bfSAndreas Jaekel static  zev_sig_cache_chksums_t *
zev_chksum_cache_get_lv1_entry(zev_sig_cache_file_t * file,uint64_t off_l1)185205ed6bfSAndreas Jaekel zev_chksum_cache_get_lv1_entry(zev_sig_cache_file_t *file, uint64_t off_l1)
186205ed6bfSAndreas Jaekel {
187205ed6bfSAndreas Jaekel 	zev_sig_cache_chksums_t find_chksum;
188205ed6bfSAndreas Jaekel 	zev_sig_cache_chksums_t *cs;
189205ed6bfSAndreas Jaekel 	avl_index_t where;
190205ed6bfSAndreas Jaekel 
191cb0aa7ecSAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
192cb0aa7ecSAndreas Jaekel 
193205ed6bfSAndreas Jaekel 	find_chksum.offset_l1 = off_l1;
194205ed6bfSAndreas Jaekel 	cs = avl_find(&file->chksums, &find_chksum, &where);
195205ed6bfSAndreas Jaekel 	if (!cs) {
196205ed6bfSAndreas Jaekel 		cs = zev_zalloc(sizeof(*cs));
197205ed6bfSAndreas Jaekel 		cs->offset_l1 = off_l1;
198205ed6bfSAndreas Jaekel 		avl_insert(&file->chksums, cs, where);
199205ed6bfSAndreas Jaekel 		zev_sig_cache.cache_size += sizeof(*cs);
200205ed6bfSAndreas Jaekel 	}
201cb0aa7ecSAndreas Jaekel 
202cb0aa7ecSAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
203cb0aa7ecSAndreas Jaekel 
204205ed6bfSAndreas Jaekel 	return cs;
205205ed6bfSAndreas Jaekel }
206205ed6bfSAndreas Jaekel 
207205ed6bfSAndreas Jaekel void
zev_chksum_stats(uint64_t * c_size,uint64_t * c_hits,uint64_t * c_misses)208205ed6bfSAndreas Jaekel zev_chksum_stats(uint64_t *c_size, uint64_t *c_hits, uint64_t *c_misses)
209205ed6bfSAndreas Jaekel {
210205ed6bfSAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
211205ed6bfSAndreas Jaekel 	*c_size = zev_sig_cache.cache_size;
212205ed6bfSAndreas Jaekel 	*c_hits = zev_sig_cache.hits;
213205ed6bfSAndreas Jaekel 	*c_misses = zev_sig_cache.misses;
214205ed6bfSAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
215205ed6bfSAndreas Jaekel }
216205ed6bfSAndreas Jaekel 
217205ed6bfSAndreas Jaekel static void
zev_chksum_cache_invalidate(zev_sig_cache_file_t * file,znode_t * zp,zev_chksum_mode_t mode,uint64_t off,uint64_t len)218205ed6bfSAndreas Jaekel zev_chksum_cache_invalidate(zev_sig_cache_file_t *file,
219205ed6bfSAndreas Jaekel                             znode_t *zp,
220205ed6bfSAndreas Jaekel                             zev_chksum_mode_t mode,
221205ed6bfSAndreas Jaekel                             uint64_t off,
222205ed6bfSAndreas Jaekel                             uint64_t len)
223205ed6bfSAndreas Jaekel {
224205ed6bfSAndreas Jaekel 	zev_sig_cache_chksums_t find_chksum;
225205ed6bfSAndreas Jaekel 	zev_sig_cache_chksums_t *cs;
226205ed6bfSAndreas Jaekel 	int idx;
227205ed6bfSAndreas Jaekel 	uint64_t off_l1;
228205ed6bfSAndreas Jaekel 	uint64_t len_l1;
229205ed6bfSAndreas Jaekel 	uint64_t pos_l0;
230205ed6bfSAndreas Jaekel 	uint64_t pos_l1;
231205ed6bfSAndreas Jaekel 
232205ed6bfSAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
233205ed6bfSAndreas Jaekel 
234205ed6bfSAndreas Jaekel 	/* start of this megabyte */
235205ed6bfSAndreas Jaekel 	off_l1 = P2ALIGN(off, ZEV_L1_SIZE);
236205ed6bfSAndreas Jaekel 
237205ed6bfSAndreas Jaekel 	if (len == 0) {
238205ed6bfSAndreas Jaekel 		/* truncate() to EOF */
239205ed6bfSAndreas Jaekel 		len_l1 = ZEV_L1_SIZE;
240205ed6bfSAndreas Jaekel 	} else {
241205ed6bfSAndreas Jaekel 		/* full megabytes */
242205ed6bfSAndreas Jaekel 		len_l1 = len + (off - off_l1);
243205ed6bfSAndreas Jaekel 		len_l1 = P2ROUNDUP(len_l1, ZEV_L1_SIZE);
244205ed6bfSAndreas Jaekel 	}
245205ed6bfSAndreas Jaekel 
246205ed6bfSAndreas Jaekel 	for (pos_l1 = off_l1; pos_l1 < (off_l1+len_l1); pos_l1 += ZEV_L1_SIZE) {
247205ed6bfSAndreas Jaekel 
248205ed6bfSAndreas Jaekel 		find_chksum.offset_l1 = pos_l1;
249205ed6bfSAndreas Jaekel 		cs = avl_find(&file->chksums, &find_chksum, NULL);
250205ed6bfSAndreas Jaekel 		if (!cs)
251205ed6bfSAndreas Jaekel 			continue;
252205ed6bfSAndreas Jaekel 
253205ed6bfSAndreas Jaekel 		for (pos_l0 = MAX(pos_l1, P2ALIGN(off, ZEV_L0_SIZE));
254205ed6bfSAndreas Jaekel 		     pos_l0 < (pos_l1 + ZEV_L1_SIZE);
255205ed6bfSAndreas Jaekel 		     pos_l0 += ZEV_L0_SIZE){
256205ed6bfSAndreas Jaekel 
25767cfdb11SAndreas Jaekel 			if ((len > 0) && (pos_l0 > (off + len - 1)))
258205ed6bfSAndreas Jaekel 				break;
259205ed6bfSAndreas Jaekel 
260205ed6bfSAndreas Jaekel 			idx = (pos_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
261205ed6bfSAndreas Jaekel 			memcpy(cs->sigs[idx], unknown_sig, SHA1_DIGEST_LENGTH);
262205ed6bfSAndreas Jaekel 		}
263205ed6bfSAndreas Jaekel 	}
264205ed6bfSAndreas Jaekel 
265205ed6bfSAndreas Jaekel 	if (len == 0) {
266205ed6bfSAndreas Jaekel 		/* truncate() to EOF -> invalidate all l1 sigs beyond EOF */
267205ed6bfSAndreas Jaekel 		while ((cs = avl_last(&file->chksums)) != NULL) {
268205ed6bfSAndreas Jaekel 			if (cs->offset_l1 < zp->z_size)
269205ed6bfSAndreas Jaekel 				break;
270205ed6bfSAndreas Jaekel 			avl_remove(&file->chksums, cs);
271205ed6bfSAndreas Jaekel 			zev_sig_cache.cache_size -= sizeof(*cs);
272205ed6bfSAndreas Jaekel 			zev_free(cs, sizeof(*cs));
273205ed6bfSAndreas Jaekel 		}
274205ed6bfSAndreas Jaekel 	}
275205ed6bfSAndreas Jaekel 
276205ed6bfSAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
277205ed6bfSAndreas Jaekel }
278205ed6bfSAndreas Jaekel 
279205ed6bfSAndreas Jaekel static int
zev_chksum_cache_get(uint8_t * dst,zev_sig_cache_file_t * file,zev_sig_cache_chksums_t * cs,uint64_t off_l0)280205ed6bfSAndreas Jaekel zev_chksum_cache_get(uint8_t *dst,
281205ed6bfSAndreas Jaekel                      zev_sig_cache_file_t *file,
282205ed6bfSAndreas Jaekel                      zev_sig_cache_chksums_t *cs,
283205ed6bfSAndreas Jaekel                      uint64_t off_l0)
284205ed6bfSAndreas Jaekel {
285205ed6bfSAndreas Jaekel 	int idx;
286205ed6bfSAndreas Jaekel 
287205ed6bfSAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
288205ed6bfSAndreas Jaekel 
289205ed6bfSAndreas Jaekel 	idx = (off_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
290205ed6bfSAndreas Jaekel 	if (!memcmp(cs->sigs[idx], unknown_sig, SHA1_DIGEST_LENGTH)) {
291205ed6bfSAndreas Jaekel 		zev_sig_cache.misses++;
292205ed6bfSAndreas Jaekel 		mutex_exit(&zev_sig_cache.mutex);
293205ed6bfSAndreas Jaekel 		return ENOENT;
294205ed6bfSAndreas Jaekel 	}
295205ed6bfSAndreas Jaekel 	memcpy(dst, cs->sigs[idx], SHA1_DIGEST_LENGTH);
296205ed6bfSAndreas Jaekel 	zev_sig_cache.hits++;
297205ed6bfSAndreas Jaekel 
298205ed6bfSAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
299205ed6bfSAndreas Jaekel 	return 0;
300205ed6bfSAndreas Jaekel }
301205ed6bfSAndreas Jaekel 
302205ed6bfSAndreas Jaekel static void
zev_chksum_cache_put(uint8_t * sig,zev_sig_cache_file_t * file,zev_sig_cache_chksums_t * cs,uint64_t off_l0)303205ed6bfSAndreas Jaekel zev_chksum_cache_put(uint8_t *sig,
304205ed6bfSAndreas Jaekel                      zev_sig_cache_file_t *file,
305205ed6bfSAndreas Jaekel                      zev_sig_cache_chksums_t *cs,
306205ed6bfSAndreas Jaekel                      uint64_t off_l0)
307205ed6bfSAndreas Jaekel {
308205ed6bfSAndreas Jaekel 	zev_sig_cache_file_t *f;
309e5fbd3acSAndreas Jaekel 	zev_sig_cache_file_t *tmp;
310205ed6bfSAndreas Jaekel 	int idx;
311205ed6bfSAndreas Jaekel 
312205ed6bfSAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
313205ed6bfSAndreas Jaekel 
314205ed6bfSAndreas Jaekel 	if (zev_sig_cache.max_cache_size == 0) {
315205ed6bfSAndreas Jaekel 		/* cache disabled */
316205ed6bfSAndreas Jaekel 		mutex_exit(&zev_sig_cache.mutex);
317205ed6bfSAndreas Jaekel 		return;
318205ed6bfSAndreas Jaekel 	}
319205ed6bfSAndreas Jaekel 
320205ed6bfSAndreas Jaekel 	/* expire entries until there's room in the cache */
32110d7bc57SAndreas Jaekel 	f = list_tail(&zev_sig_cache.lru);
322e5fbd3acSAndreas Jaekel 	while (f && (zev_sig_cache.cache_size > zev_sig_cache.max_cache_size)){
323e5fbd3acSAndreas Jaekel 		tmp = f;
32410d7bc57SAndreas Jaekel 		f = list_prev(&zev_sig_cache.lru, f);
325e5fbd3acSAndreas Jaekel 		if (tmp->refcnt == 0)
326e5fbd3acSAndreas Jaekel 			zev_chksum_cache_file_free(tmp);
327205ed6bfSAndreas Jaekel 	}
328205ed6bfSAndreas Jaekel 
329205ed6bfSAndreas Jaekel 	idx = (off_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
330205ed6bfSAndreas Jaekel 	memcpy(cs->sigs[idx], sig, SHA1_DIGEST_LENGTH);
331205ed6bfSAndreas Jaekel 
332205ed6bfSAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
333205ed6bfSAndreas Jaekel 	return;
334205ed6bfSAndreas Jaekel }
335205ed6bfSAndreas Jaekel 
336205ed6bfSAndreas Jaekel /* verbatim from zfs_vnops.c (unfortunatly it's declared static, there) */
337205ed6bfSAndreas Jaekel static int
mappedread(vnode_t * vp,int nbytes,uio_t * uio)338205ed6bfSAndreas Jaekel mappedread(vnode_t *vp, int nbytes, uio_t *uio)
339205ed6bfSAndreas Jaekel {
340205ed6bfSAndreas Jaekel 	znode_t *zp = VTOZ(vp);
341205ed6bfSAndreas Jaekel 	objset_t *os = zp->z_zfsvfs->z_os;
342205ed6bfSAndreas Jaekel 	int64_t	start, off;
343205ed6bfSAndreas Jaekel 	int len = nbytes;
344205ed6bfSAndreas Jaekel 	int error = 0;
345205ed6bfSAndreas Jaekel 
346205ed6bfSAndreas Jaekel 	start = uio->uio_loffset;
347205ed6bfSAndreas Jaekel 	off = start & PAGEOFFSET;
348205ed6bfSAndreas Jaekel 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
349205ed6bfSAndreas Jaekel 		page_t *pp;
350205ed6bfSAndreas Jaekel 		uint64_t bytes = MIN(PAGESIZE - off, len);
351205ed6bfSAndreas Jaekel 
352205ed6bfSAndreas Jaekel 		if (pp = page_lookup(vp, start, SE_SHARED)) {
353205ed6bfSAndreas Jaekel 			caddr_t va;
354205ed6bfSAndreas Jaekel 
355205ed6bfSAndreas Jaekel 			va = zfs_map_page(pp, S_READ);
356205ed6bfSAndreas Jaekel 			error = uiomove(va + off, bytes, UIO_READ, uio);
357205ed6bfSAndreas Jaekel 			zfs_unmap_page(pp, va);
358205ed6bfSAndreas Jaekel 			page_unlock(pp);
359205ed6bfSAndreas Jaekel 		} else {
360205ed6bfSAndreas Jaekel 			error = dmu_read_uio(os, zp->z_id, uio, bytes);
361205ed6bfSAndreas Jaekel 		}
362205ed6bfSAndreas Jaekel 		len -= bytes;
363205ed6bfSAndreas Jaekel 		off = 0;
364205ed6bfSAndreas Jaekel 		if (error)
365205ed6bfSAndreas Jaekel 			break;
366205ed6bfSAndreas Jaekel 	}
367205ed6bfSAndreas Jaekel 	return (error);
368205ed6bfSAndreas Jaekel }
369205ed6bfSAndreas Jaekel 
370205ed6bfSAndreas Jaekel static int
zev_safe_read(znode_t * zp,char * buf,uint64_t off,uint64_t len)371205ed6bfSAndreas Jaekel zev_safe_read(znode_t *zp, char *buf, uint64_t off, uint64_t len)
372205ed6bfSAndreas Jaekel {
373205ed6bfSAndreas Jaekel 	uio_t		uio;
374205ed6bfSAndreas Jaekel 	struct iovec	iov;
375205ed6bfSAndreas Jaekel 	ssize_t		n;
376205ed6bfSAndreas Jaekel 	ssize_t		nbytes;
377205ed6bfSAndreas Jaekel 	int		error = 0;
378205ed6bfSAndreas Jaekel 	vnode_t		*vp = ZTOV(zp);
379205ed6bfSAndreas Jaekel 	objset_t	*os = zp->z_zfsvfs->z_os;
380205ed6bfSAndreas Jaekel 
381205ed6bfSAndreas Jaekel 	/* set up uio */
382205ed6bfSAndreas Jaekel 
383205ed6bfSAndreas Jaekel 	iov.iov_base = buf;
384205ed6bfSAndreas Jaekel 	iov.iov_len = ZEV_L0_SIZE;
385205ed6bfSAndreas Jaekel 
386205ed6bfSAndreas Jaekel 	uio.uio_iov = &iov;
387205ed6bfSAndreas Jaekel 	uio.uio_iovcnt = 1;
388205ed6bfSAndreas Jaekel 	uio.uio_segflg = (short)UIO_SYSSPACE;
389205ed6bfSAndreas Jaekel 	uio.uio_llimit = RLIM64_INFINITY;
390205ed6bfSAndreas Jaekel 	uio.uio_fmode = FREAD;
391205ed6bfSAndreas Jaekel 	uio.uio_extflg = UIO_COPY_DEFAULT;
392205ed6bfSAndreas Jaekel 
393205ed6bfSAndreas Jaekel 	uio.uio_loffset = off;
394205ed6bfSAndreas Jaekel 	uio.uio_resid = len;
395205ed6bfSAndreas Jaekel 
396205ed6bfSAndreas Jaekel again:
397205ed6bfSAndreas Jaekel 	if (uio.uio_loffset >= zp->z_size)
398205ed6bfSAndreas Jaekel 		return EINVAL;
399205ed6bfSAndreas Jaekel 
400205ed6bfSAndreas Jaekel 	/* don't read past EOF */
401205ed6bfSAndreas Jaekel 	n = MIN(uio.uio_resid, zp->z_size - uio.uio_loffset);
402205ed6bfSAndreas Jaekel 
403205ed6bfSAndreas Jaekel 	/* this block was essentially copied from zfs_read() in zfs_vnops.c */
404205ed6bfSAndreas Jaekel 	while (n > 0) {
405205ed6bfSAndreas Jaekel 		nbytes = MIN(n, zfs_read_chunk_size -
406205ed6bfSAndreas Jaekel 		    P2PHASE(uio.uio_loffset, zfs_read_chunk_size));
407205ed6bfSAndreas Jaekel 
408205ed6bfSAndreas Jaekel 		if (vn_has_cached_data(vp)) {
409205ed6bfSAndreas Jaekel 			error = mappedread(vp, nbytes, &uio);
410205ed6bfSAndreas Jaekel 		} else {
411205ed6bfSAndreas Jaekel 			error = dmu_read_uio(os, zp->z_id, &uio, nbytes);
412205ed6bfSAndreas Jaekel 		}
413205ed6bfSAndreas Jaekel 		if (error) {
41441aa6ebdSSimon Klinkert 			if (error == EINTR)
415205ed6bfSAndreas Jaekel 				goto again;
416205ed6bfSAndreas Jaekel 			/* convert checksum errors into IO errors */
417205ed6bfSAndreas Jaekel 			if (error == ECKSUM)
418205ed6bfSAndreas Jaekel 				error = SET_ERROR(EIO);
419205ed6bfSAndreas Jaekel 			break;
420205ed6bfSAndreas Jaekel 		}
421205ed6bfSAndreas Jaekel 
422205ed6bfSAndreas Jaekel 		n -= nbytes;
423205ed6bfSAndreas Jaekel 	}
424205ed6bfSAndreas Jaekel 
425205ed6bfSAndreas Jaekel 	if (error)
426205ed6bfSAndreas Jaekel 		return error;
427205ed6bfSAndreas Jaekel 	return len - uio.uio_resid;
428205ed6bfSAndreas Jaekel }
429205ed6bfSAndreas Jaekel 
430205ed6bfSAndreas Jaekel static void
zev_l0_sig(uint8_t * sig,char * buf)431205ed6bfSAndreas Jaekel zev_l0_sig(uint8_t *sig, char *buf)
432205ed6bfSAndreas Jaekel {
433205ed6bfSAndreas Jaekel 	SHA1_CTX	ctx;
434205ed6bfSAndreas Jaekel 
435205ed6bfSAndreas Jaekel 	SHA1Init(&ctx);
436205ed6bfSAndreas Jaekel 	SHA1Update(&ctx, buf, ZEV_L0_SIZE);
437205ed6bfSAndreas Jaekel 	SHA1Final(sig, &ctx);
438205ed6bfSAndreas Jaekel 	return;
439205ed6bfSAndreas Jaekel }
440205ed6bfSAndreas Jaekel 
441205ed6bfSAndreas Jaekel static void
zev_l0_blocksig(uint8_t * blk_sig,uint8_t * l0_sig,uint8_t block_no)442205ed6bfSAndreas Jaekel zev_l0_blocksig(uint8_t *blk_sig, uint8_t *l0_sig, uint8_t block_no)
443205ed6bfSAndreas Jaekel {
444205ed6bfSAndreas Jaekel 	SHA1_CTX	ctx;
445205ed6bfSAndreas Jaekel 
446205ed6bfSAndreas Jaekel 	SHA1Init(&ctx);
447205ed6bfSAndreas Jaekel 	SHA1Update(&ctx, l0_sig, SHA1_DIGEST_LENGTH);
448205ed6bfSAndreas Jaekel 	SHA1Update(&ctx, &block_no, sizeof(block_no));
449205ed6bfSAndreas Jaekel 	SHA1Final(blk_sig, &ctx);
450205ed6bfSAndreas Jaekel 	return;
451205ed6bfSAndreas Jaekel }
452205ed6bfSAndreas Jaekel 
453205ed6bfSAndreas Jaekel static void
zev_l1_add(uint8_t * sig_l1,uint8_t * sig_l0)454205ed6bfSAndreas Jaekel zev_l1_add(uint8_t *sig_l1, uint8_t *sig_l0)
455205ed6bfSAndreas Jaekel {
456205ed6bfSAndreas Jaekel 	int	i;
457205ed6bfSAndreas Jaekel 	int	s;
458205ed6bfSAndreas Jaekel 	int	carry = 0;
459205ed6bfSAndreas Jaekel 
460205ed6bfSAndreas Jaekel 	for (i = SHA1_DIGEST_LENGTH - 1; i >= 0; --i) {
461205ed6bfSAndreas Jaekel 		s = sig_l1[i] + sig_l0[i] + carry;
462205ed6bfSAndreas Jaekel 		carry = s > 255 ? 1 : 0;
463205ed6bfSAndreas Jaekel 		sig_l1[i] = s & 0xff;
464205ed6bfSAndreas Jaekel 	}
465205ed6bfSAndreas Jaekel }
466205ed6bfSAndreas Jaekel 
46742110aacSAndreas Jaekel static int
zev_get_result_buffer(zev_sig_t ** buffer,uint64_t * buffer_len,uint64_t max_buffer_len,znode_t * zp,uint64_t off,uint64_t len,zev_chksum_mode_t mode)468205ed6bfSAndreas Jaekel zev_get_result_buffer(zev_sig_t **buffer,
469205ed6bfSAndreas Jaekel                       uint64_t *buffer_len,
47042110aacSAndreas Jaekel                       uint64_t max_buffer_len,
471205ed6bfSAndreas Jaekel                       znode_t *zp,
472205ed6bfSAndreas Jaekel                       uint64_t off,
473205ed6bfSAndreas Jaekel                       uint64_t len,
474205ed6bfSAndreas Jaekel                       zev_chksum_mode_t mode)
475205ed6bfSAndreas Jaekel {
476205ed6bfSAndreas Jaekel 	uint64_t	blk_start;
477205ed6bfSAndreas Jaekel 	uint64_t	blk_end;
478205ed6bfSAndreas Jaekel 	uint64_t	l0_blocks;
479205ed6bfSAndreas Jaekel 	uint64_t	l1_blocks;
480205ed6bfSAndreas Jaekel 	uint64_t	sigs;
481205ed6bfSAndreas Jaekel 	int buflen;
482205ed6bfSAndreas Jaekel 
483205ed6bfSAndreas Jaekel 	/* calculate result set size: how many checksums will we provide? */
484205ed6bfSAndreas Jaekel 
485205ed6bfSAndreas Jaekel 	ASSERT(len > 0 || (mode == zev_truncate && len == 0));
486205ed6bfSAndreas Jaekel 
487205ed6bfSAndreas Jaekel 	if (len == 0) {
488205ed6bfSAndreas Jaekel 		/* truncate */
489205ed6bfSAndreas Jaekel 		l0_blocks = ((off % ZEV_L0_SIZE) == 0) ? 0 : 1;
490205ed6bfSAndreas Jaekel 		l1_blocks = ((off % ZEV_L1_SIZE) == 0) ? 0 : 1;
491205ed6bfSAndreas Jaekel 	} else {
492205ed6bfSAndreas Jaekel 		/* how many lv1 checksums do we update? */
493205ed6bfSAndreas Jaekel 		blk_start = off / ZEV_L1_SIZE;
494205ed6bfSAndreas Jaekel 		blk_end = (off + len - 1) / ZEV_L1_SIZE;
495205ed6bfSAndreas Jaekel 		l1_blocks = blk_end - blk_start + 1;
496205ed6bfSAndreas Jaekel 		/* how many lv0 checksums do we update? */
497205ed6bfSAndreas Jaekel 		blk_start = off / ZEV_L0_SIZE;
498205ed6bfSAndreas Jaekel 		blk_end = (off + len - 1) / ZEV_L0_SIZE;
499205ed6bfSAndreas Jaekel 		l0_blocks = blk_end - blk_start + 1;
500205ed6bfSAndreas Jaekel 	}
501205ed6bfSAndreas Jaekel 
502205ed6bfSAndreas Jaekel 	sigs = l1_blocks + l0_blocks;
503205ed6bfSAndreas Jaekel 	if (sigs == 0) {
504205ed6bfSAndreas Jaekel 		*buffer = NULL;
505205ed6bfSAndreas Jaekel 		*buffer_len = 0;
50642110aacSAndreas Jaekel 		return 0;
507205ed6bfSAndreas Jaekel 	}
508205ed6bfSAndreas Jaekel 
509205ed6bfSAndreas Jaekel 	buflen = sigs * sizeof(zev_sig_t);
51042110aacSAndreas Jaekel 	if (max_buffer_len && (buflen > max_buffer_len)) {
51142110aacSAndreas Jaekel 		*buffer = NULL;
51242110aacSAndreas Jaekel 		*buffer_len = 0;
51342110aacSAndreas Jaekel 		return ENOSPC;
51442110aacSAndreas Jaekel 	}
515205ed6bfSAndreas Jaekel 	*buffer_len = buflen;
516205ed6bfSAndreas Jaekel 	*buffer = zev_alloc(buflen);
51742110aacSAndreas Jaekel 	return 0;
518205ed6bfSAndreas Jaekel }
519205ed6bfSAndreas Jaekel 
520205ed6bfSAndreas Jaekel static void
zev_append_sig(zev_sig_t * s,int level,uint64_t off,uint8_t * sig)521205ed6bfSAndreas Jaekel zev_append_sig(zev_sig_t *s, int level, uint64_t off, uint8_t *sig)
522205ed6bfSAndreas Jaekel {
523205ed6bfSAndreas Jaekel 	s->level = level;
524205ed6bfSAndreas Jaekel 	s->block_offset = off;
525205ed6bfSAndreas Jaekel 	memcpy(s->value, sig, SHA1_DIGEST_LENGTH);
526205ed6bfSAndreas Jaekel }
527205ed6bfSAndreas Jaekel 
528205ed6bfSAndreas Jaekel /*
529205ed6bfSAndreas Jaekel  * Calculate all l0 and l1 checksums that are affected by the given range.
530205ed6bfSAndreas Jaekel  *
531205ed6bfSAndreas Jaekel  * This function assumes that the ranges it needs to read are already
532205ed6bfSAndreas Jaekel  * range-locked.
533205ed6bfSAndreas Jaekel  */
534205ed6bfSAndreas Jaekel int
zev_get_checksums(zev_sig_t ** result,uint64_t * result_buf_len,uint64_t * signature_cnt,uint64_t max_result_len,znode_t * zp,uint64_t off,uint64_t len,zev_chksum_mode_t mode)535205ed6bfSAndreas Jaekel zev_get_checksums(zev_sig_t **result,
536205ed6bfSAndreas Jaekel                   uint64_t *result_buf_len,
537205ed6bfSAndreas Jaekel                   uint64_t *signature_cnt,
53842110aacSAndreas Jaekel                   uint64_t max_result_len,
539205ed6bfSAndreas Jaekel                   znode_t *zp,
540205ed6bfSAndreas Jaekel                   uint64_t off,
541205ed6bfSAndreas Jaekel                   uint64_t len,
542205ed6bfSAndreas Jaekel                   zev_chksum_mode_t mode)
543205ed6bfSAndreas Jaekel {
544205ed6bfSAndreas Jaekel 	uint64_t	off_l1;
545205ed6bfSAndreas Jaekel 	uint64_t	len_l1;
546205ed6bfSAndreas Jaekel 	uint64_t	pos_l1;
547205ed6bfSAndreas Jaekel 	uint64_t	pos_l0;
548205ed6bfSAndreas Jaekel 	char		*buf;
549205ed6bfSAndreas Jaekel 	int64_t		ret;
550205ed6bfSAndreas Jaekel 	uint8_t		sig_l0[SHA1_DIGEST_LENGTH];
551205ed6bfSAndreas Jaekel 	uint8_t		blk_sig_l0[SHA1_DIGEST_LENGTH];
552205ed6bfSAndreas Jaekel 	uint8_t		sig_l1[SHA1_DIGEST_LENGTH];
553205ed6bfSAndreas Jaekel 	uint8_t		l0_block_no;
554205ed6bfSAndreas Jaekel 	zev_sig_t	*sig;
555205ed6bfSAndreas Jaekel 	int		non_empty_l0_blocks;
556205ed6bfSAndreas Jaekel 	zev_sig_cache_file_t *file;
557205ed6bfSAndreas Jaekel 	zev_sig_cache_chksums_t *cs;
558205ed6bfSAndreas Jaekel 
559205ed6bfSAndreas Jaekel 	/*
560205ed6bfSAndreas Jaekel 	 * Note: for write events, the callback is called via
561205ed6bfSAndreas Jaekel 	 *    zfs_write() -> zfs_log_write() -> zev_znode_write_cb()
562205ed6bfSAndreas Jaekel 	 *
563205ed6bfSAndreas Jaekel 	 * The transaction is not commited, yet.
564205ed6bfSAndreas Jaekel 	 *
565205ed6bfSAndreas Jaekel 	 * A write() syscall might be split into smaller chunks by zfs_write()
566205ed6bfSAndreas Jaekel 	 *
567205ed6bfSAndreas Jaekel 	 * zfs_write() has a range lock when this is called. (zfs_vnops.c:925)
568205ed6bfSAndreas Jaekel 	 * In zev mode, the range lock will encompass all data we need
569205ed6bfSAndreas Jaekel 	 * to calculate our checksums.
570205ed6bfSAndreas Jaekel 	 *
571205ed6bfSAndreas Jaekel 	 * The same is true for truncates with non-zero length. ("punch hole")
572205ed6bfSAndreas Jaekel 	 */
573205ed6bfSAndreas Jaekel 
574205ed6bfSAndreas Jaekel 	ASSERT(len > 0 || (mode == zev_truncate && len == 0));
575205ed6bfSAndreas Jaekel 	*signature_cnt = 0;
576205ed6bfSAndreas Jaekel 
577e36b97c7SAndreas Jaekel 	/*
578e36b97c7SAndreas Jaekel 	 * Under certain circumstances we need the first l0 block's
579e36b97c7SAndreas Jaekel 	 * checksum, because we didn't store it in the database and
580e36b97c7SAndreas Jaekel 	 * can't easily get it from userspace.  Not for this exact point
581e36b97c7SAndreas Jaekel 	 * in time, anyway.  So we cheat a little.
582e36b97c7SAndreas Jaekel 	 */
583e36b97c7SAndreas Jaekel 	if (mode == zev_truncate && len == 0 && off == 4096) {
584e36b97c7SAndreas Jaekel 		/*
585e36b97c7SAndreas Jaekel 		 * Normally, we'd report no checkums:
586e36b97c7SAndreas Jaekel 		 *  - no l0 sum, because no remaining l0 block is changed
587e36b97c7SAndreas Jaekel 		 *  - no l1 sum, because the file is now too short for l1 sums
588e36b97c7SAndreas Jaekel 		 * Let's pretend we changed the first l0 block, then.
589e36b97c7SAndreas Jaekel 		 * Luckily the entire file is range locked during truncate().
590e36b97c7SAndreas Jaekel 		 */
591e36b97c7SAndreas Jaekel 		off = 0;
592e36b97c7SAndreas Jaekel 		len = 4096;
593e36b97c7SAndreas Jaekel 	}
594e36b97c7SAndreas Jaekel 
595205ed6bfSAndreas Jaekel 	/* start of this megabyte */
596205ed6bfSAndreas Jaekel 	off_l1 = P2ALIGN(off, ZEV_L1_SIZE);
597205ed6bfSAndreas Jaekel 	/* full megabytes */
598205ed6bfSAndreas Jaekel 	if (len == 0) {
599205ed6bfSAndreas Jaekel 		/* truncate(): we'll look at the last lv1 block, only. */
600205ed6bfSAndreas Jaekel 		len_l1 = ZEV_L1_SIZE;
601205ed6bfSAndreas Jaekel 	} else {
602205ed6bfSAndreas Jaekel 		len_l1 = len + (off - off_l1);
603205ed6bfSAndreas Jaekel 		len_l1 = P2ROUNDUP(len_l1, ZEV_L1_SIZE);
604205ed6bfSAndreas Jaekel 	}
605205ed6bfSAndreas Jaekel 
606205ed6bfSAndreas Jaekel 	file = zev_chksum_cache_file_get_and_hold(zp);
607205ed6bfSAndreas Jaekel 	zev_chksum_cache_invalidate(file, zp, mode, off, len);
608205ed6bfSAndreas Jaekel 	buf = zev_alloc(ZEV_L0_SIZE);
609205ed6bfSAndreas Jaekel 
61042110aacSAndreas Jaekel 	ret = zev_get_result_buffer(result, result_buf_len, max_result_len,
61142110aacSAndreas Jaekel 	                            zp, off, len, mode);
61242110aacSAndreas Jaekel 	if (ret) {
61342110aacSAndreas Jaekel 		zev_free(buf, ZEV_L0_SIZE);
61442110aacSAndreas Jaekel 		zev_chksum_cache_file_release(file);
61542110aacSAndreas Jaekel 		return ret;
61642110aacSAndreas Jaekel 	}
617205ed6bfSAndreas Jaekel 	if (*result == NULL) {
618205ed6bfSAndreas Jaekel 		/* we're done */
619205ed6bfSAndreas Jaekel 		zev_free(buf, ZEV_L0_SIZE);
620205ed6bfSAndreas Jaekel 		zev_chksum_cache_file_release(file);
621205ed6bfSAndreas Jaekel 		return 0;
622205ed6bfSAndreas Jaekel 	}
623205ed6bfSAndreas Jaekel 	sig = *result;
624205ed6bfSAndreas Jaekel 
625205ed6bfSAndreas Jaekel 	for (pos_l1 = off_l1; pos_l1 < (off_l1+len_l1); pos_l1 += ZEV_L1_SIZE) {
626205ed6bfSAndreas Jaekel 
627205ed6bfSAndreas Jaekel 		if (pos_l1 > zp->z_size) {
628205ed6bfSAndreas Jaekel 			cmn_err(CE_WARN, "zev_get_checksums: off+len beyond "
629a7dcf41bSSimon Klinkert 			        "EOF. Unexpected behaviour; please fix! "
630a7dcf41bSSimon Klinkert 				"off=%" PRIu64 ", len=%" PRIu64 ", "
631a7dcf41bSSimon Klinkert 			        "dataset='%s', inode=%" PRIu64, off, len,
632a7dcf41bSSimon Klinkert 				zp->z_zfsvfs->z_os->
633e0628a6fSAndreas Jaekel 			        os_dsl_dataset->ds_dir->dd_myname, zp->z_id);
634e0628a6fSAndreas Jaekel 			zev_free(*result, *result_buf_len);
635e0628a6fSAndreas Jaekel 			*result = NULL;
636*6d08434eSSimon Klinkert 			zev_free(buf, ZEV_L0_SIZE);
637*6d08434eSSimon Klinkert 			zev_chksum_cache_file_release(file);
638*6d08434eSSimon Klinkert 			return EIO;
639205ed6bfSAndreas Jaekel 		}
640205ed6bfSAndreas Jaekel 
641205ed6bfSAndreas Jaekel 		/*
642205ed6bfSAndreas Jaekel 		 * Since we have a reference to 'file' 'cs' can't be expired.
643205ed6bfSAndreas Jaekel 		 * Since our ranges are range locked, other threads woun't
644205ed6bfSAndreas Jaekel 		 * touch our checksum entries. (not even read them)
645205ed6bfSAndreas Jaekel 		 * Hence, we don't need to hold() or release() 'cs'.
646205ed6bfSAndreas Jaekel 		 */
647205ed6bfSAndreas Jaekel 		cs = zev_chksum_cache_get_lv1_entry(file, pos_l1);
648205ed6bfSAndreas Jaekel 
649205ed6bfSAndreas Jaekel 		l0_block_no = 0;
650205ed6bfSAndreas Jaekel 		non_empty_l0_blocks = 0;
651205ed6bfSAndreas Jaekel 		bzero(sig_l1, sizeof(sig_l1));
652205ed6bfSAndreas Jaekel 		for (pos_l0 = pos_l1;
653205ed6bfSAndreas Jaekel 		     pos_l0 < (pos_l1 + ZEV_L1_SIZE);
654205ed6bfSAndreas Jaekel 		     pos_l0 += ZEV_L0_SIZE){
655205ed6bfSAndreas Jaekel 
656205ed6bfSAndreas Jaekel 			if (pos_l0 >= zp->z_size)
657205ed6bfSAndreas Jaekel 				break;	/* EOF */
658205ed6bfSAndreas Jaekel 
659205ed6bfSAndreas Jaekel 			if (zev_chksum_cache_get(sig_l0, file,cs,pos_l0) != 0) {
660205ed6bfSAndreas Jaekel 
661205ed6bfSAndreas Jaekel 				/* signature is not cached, yet. */
662205ed6bfSAndreas Jaekel 				ret = zev_safe_read(zp, buf,
663205ed6bfSAndreas Jaekel 				                    pos_l0, ZEV_L0_SIZE);
664205ed6bfSAndreas Jaekel 				if (ret < 0) {
665205ed6bfSAndreas Jaekel 					zev_free(*result, *result_buf_len);
666205ed6bfSAndreas Jaekel 					zev_free(buf, ZEV_L0_SIZE);
667205ed6bfSAndreas Jaekel 					zev_chksum_cache_file_release(file);
668205ed6bfSAndreas Jaekel 					return ret;
669205ed6bfSAndreas Jaekel 				}
670205ed6bfSAndreas Jaekel 				/* pad buffer with zeros if necessary */
671205ed6bfSAndreas Jaekel 				if (ret < ZEV_L0_SIZE)
672205ed6bfSAndreas Jaekel 					bzero(buf + ret, ZEV_L0_SIZE - ret);
673205ed6bfSAndreas Jaekel 
674205ed6bfSAndreas Jaekel 				/* calculate signature */
675205ed6bfSAndreas Jaekel 				zev_l0_sig(sig_l0, buf);
676205ed6bfSAndreas Jaekel 
677205ed6bfSAndreas Jaekel 				zev_chksum_cache_put(sig_l0, file, cs, pos_l0);
678205ed6bfSAndreas Jaekel 			}
679205ed6bfSAndreas Jaekel 
680205ed6bfSAndreas Jaekel 			if (!memcmp(sig_l0, all_zero_sig, SHA1_DIGEST_LENGTH)) {
681205ed6bfSAndreas Jaekel 				/* all-zero l0 block.  omit signature. */
682205ed6bfSAndreas Jaekel 				l0_block_no++;
683205ed6bfSAndreas Jaekel 				continue;
684205ed6bfSAndreas Jaekel 			}
685205ed6bfSAndreas Jaekel 			non_empty_l0_blocks++;
686205ed6bfSAndreas Jaekel 			zev_l0_blocksig(blk_sig_l0, sig_l0, l0_block_no);
687205ed6bfSAndreas Jaekel 			zev_l1_add(sig_l1, blk_sig_l0);
688205ed6bfSAndreas Jaekel 
689205ed6bfSAndreas Jaekel 			if (((pos_l0 + ZEV_L0_SIZE - 1) >= off) &&
6900b3245eaSAndreas Jaekel 			    (pos_l0 <= (off + len - 1))) {
691205ed6bfSAndreas Jaekel 				zev_append_sig(sig++, 0, pos_l0, sig_l0);
692205ed6bfSAndreas Jaekel 			}
693205ed6bfSAndreas Jaekel 
694205ed6bfSAndreas Jaekel 			l0_block_no++;
695205ed6bfSAndreas Jaekel 		}
696205ed6bfSAndreas Jaekel 
697205ed6bfSAndreas Jaekel 		if (non_empty_l0_blocks && (zp->z_size > ZEV_L0_SIZE))
698205ed6bfSAndreas Jaekel 			zev_append_sig(sig++, 1, pos_l1, sig_l1);
699205ed6bfSAndreas Jaekel 	}
700205ed6bfSAndreas Jaekel 
701205ed6bfSAndreas Jaekel 	*signature_cnt = ((char *)sig - (char *)*result) / sizeof(zev_sig_t);
702205ed6bfSAndreas Jaekel 
703205ed6bfSAndreas Jaekel 	zev_free(buf, ZEV_L0_SIZE);
704205ed6bfSAndreas Jaekel 	zev_chksum_cache_file_release(file);
705205ed6bfSAndreas Jaekel 	return 0;
706205ed6bfSAndreas Jaekel }
70742110aacSAndreas Jaekel 
70842110aacSAndreas Jaekel int
zev_ioc_get_signatures(intptr_t arg,int mode)70942110aacSAndreas Jaekel zev_ioc_get_signatures(intptr_t arg, int mode)
71042110aacSAndreas Jaekel {
71142110aacSAndreas Jaekel 	zev_ioctl_get_signatures_t gs;
71242110aacSAndreas Jaekel 	file_t *fp;
71342110aacSAndreas Jaekel 	int ret = 0;
71442110aacSAndreas Jaekel 	znode_t *zp;
71542110aacSAndreas Jaekel 	zev_sig_t *sig_buf = NULL;
71642110aacSAndreas Jaekel 	uint64_t sig_buf_len;
71742110aacSAndreas Jaekel 	uint64_t sig_cnt = 0;
71842110aacSAndreas Jaekel 	uint64_t sig_len;
71942110aacSAndreas Jaekel 	char *dst;
72042110aacSAndreas Jaekel 	int range_locked = 0;
72142110aacSAndreas Jaekel 	rl_t *rl;
72242110aacSAndreas Jaekel 	ssize_t	lock_off;
72342110aacSAndreas Jaekel 	ssize_t lock_len;
72415377986SAndreas Jaekel 	struct zfsvfs *zfsvfs = NULL;
72542110aacSAndreas Jaekel 
72642110aacSAndreas Jaekel 	if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0)
72742110aacSAndreas Jaekel 		return EFAULT;
72842110aacSAndreas Jaekel 	fp = getf(gs.zev_fd);
72942110aacSAndreas Jaekel 	if (fp == NULL)
73042110aacSAndreas Jaekel 		return EBADF;
73142110aacSAndreas Jaekel 	if (fp->f_vnode->v_vfsp->vfs_fstype != zfsfstype) {
73242110aacSAndreas Jaekel 		ret = EINVAL;
73342110aacSAndreas Jaekel 		goto out;
73442110aacSAndreas Jaekel 	}
73515377986SAndreas Jaekel 	zp = VTOZ(fp->f_vnode);
73615377986SAndreas Jaekel 
73715377986SAndreas Jaekel 	/* modified version of ZFS_ENTER() macro - we need to clean up fp */
73815377986SAndreas Jaekel 	zfsvfs = zp->z_zfsvfs;
73915377986SAndreas Jaekel 	rrm_enter_read(&zfsvfs->z_teardown_lock, FTAG);
74015377986SAndreas Jaekel 	if (zp->z_zfsvfs->z_unmounted) {
74115377986SAndreas Jaekel 		ret = EIO;
74215377986SAndreas Jaekel 		goto out;
74315377986SAndreas Jaekel 	}
74415377986SAndreas Jaekel 	/* modified version of ZFS_VERIFY_ZP() macro */
74515377986SAndreas Jaekel 	if (zp->z_sa_hdl == NULL) {
74615377986SAndreas Jaekel 		ret = EIO;
74715377986SAndreas Jaekel 		goto out;
74815377986SAndreas Jaekel 	}
74915377986SAndreas Jaekel 
75042110aacSAndreas Jaekel 	if (fp->f_vnode->v_type != VREG) {
75142110aacSAndreas Jaekel 		ret = EINVAL;
75242110aacSAndreas Jaekel 		goto out;
75342110aacSAndreas Jaekel 	}
75442110aacSAndreas Jaekel 	if (gs.zev_offset >= zp->z_size) {
75542110aacSAndreas Jaekel 		ret = EINVAL;
75642110aacSAndreas Jaekel 		goto out;
75742110aacSAndreas Jaekel 	}
75842110aacSAndreas Jaekel 
75942110aacSAndreas Jaekel 	/* range lock data */
76042110aacSAndreas Jaekel 	lock_off = P2ALIGN(gs.zev_offset, ZEV_L1_SIZE);
76142110aacSAndreas Jaekel 	lock_len = gs.zev_len + (gs.zev_offset - lock_off);
76242110aacSAndreas Jaekel 	lock_len = P2ROUNDUP(lock_len, ZEV_L1_SIZE);
76342110aacSAndreas Jaekel 	rl = zfs_range_lock(zp, lock_off, lock_len, RL_READER);
76442110aacSAndreas Jaekel 	range_locked = 1;
76542110aacSAndreas Jaekel 
76642110aacSAndreas Jaekel 	/* get checksums */
76742110aacSAndreas Jaekel 	ret = zev_get_checksums(&sig_buf, &sig_buf_len, &sig_cnt,
76842110aacSAndreas Jaekel 	                        gs.zev_bufsize,
76942110aacSAndreas Jaekel 	                        zp, gs.zev_offset, gs.zev_len, zev_write);
77042110aacSAndreas Jaekel 	if (ret)
77142110aacSAndreas Jaekel 		goto out;
77242110aacSAndreas Jaekel 
77342110aacSAndreas Jaekel 	/* copy to userland */
77442110aacSAndreas Jaekel 	sig_len = sig_cnt * sizeof(zev_sig_t);
77542110aacSAndreas Jaekel 	gs.zev_signature_cnt = sig_cnt;
77642110aacSAndreas Jaekel 	if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) {
77742110aacSAndreas Jaekel 		ret = EFAULT;
77842110aacSAndreas Jaekel 		goto out;
77942110aacSAndreas Jaekel 	}
78042110aacSAndreas Jaekel 	if (sig_cnt && sig_buf) {
78142110aacSAndreas Jaekel 		dst = (char *)arg + sizeof(gs);
78242110aacSAndreas Jaekel 		if (ddi_copyout(sig_buf, (void *)dst, sig_len, mode) != 0) {
78342110aacSAndreas Jaekel 			ret = EFAULT;
78442110aacSAndreas Jaekel 			goto out;
78542110aacSAndreas Jaekel 		}
78642110aacSAndreas Jaekel 	}
78742110aacSAndreas Jaekel out:
78842110aacSAndreas Jaekel 	if (sig_buf)
78942110aacSAndreas Jaekel 		zev_free(sig_buf, sig_buf_len);
79042110aacSAndreas Jaekel 	if (range_locked)
79142110aacSAndreas Jaekel 		zfs_range_unlock(rl);
79215377986SAndreas Jaekel 	if (zfsvfs)
79315377986SAndreas Jaekel 		ZFS_EXIT(zfsvfs);
79442110aacSAndreas Jaekel 	releasef(gs.zev_fd);
79542110aacSAndreas Jaekel 	return ret;
79642110aacSAndreas Jaekel }
79742110aacSAndreas Jaekel 
7982eabeab5SAndreas Jaekel void
zev_symlink_checksum(zev_znode_symlink_t * rec,char * link)7992eabeab5SAndreas Jaekel zev_symlink_checksum(zev_znode_symlink_t *rec, char *link)
8002eabeab5SAndreas Jaekel {
8012eabeab5SAndreas Jaekel 	char buf[ZEV_L0_SIZE];
8022eabeab5SAndreas Jaekel 
8032eabeab5SAndreas Jaekel 	memset(buf, 0, sizeof(buf));
8042eabeab5SAndreas Jaekel 	strcpy(buf, link);
8052eabeab5SAndreas Jaekel 	zev_l0_sig(rec->signature.value, buf);
8062eabeab5SAndreas Jaekel 	rec->signature.level = 0;
8072eabeab5SAndreas Jaekel 	rec->signature.block_offset = 0;
8082eabeab5SAndreas Jaekel }
8092eabeab5SAndreas Jaekel 
8102eabeab5SAndreas Jaekel 
8112eabeab5SAndreas Jaekel void
zev_create_checksum(zev_znode_create_t * rec,znode_t * zp)8122eabeab5SAndreas Jaekel zev_create_checksum(zev_znode_create_t *rec, znode_t *zp)
8132eabeab5SAndreas Jaekel {
8142eabeab5SAndreas Jaekel 	char buf[ZEV_L0_SIZE];
8152eabeab5SAndreas Jaekel 	vnode_t *vp;
8162eabeab5SAndreas Jaekel 	uint64_t rdev;
8172eabeab5SAndreas Jaekel 
8182eabeab5SAndreas Jaekel 	vp = ZTOV(zp);
8192eabeab5SAndreas Jaekel 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
8202eabeab5SAndreas Jaekel 		sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zp->z_zfsvfs),
8212eabeab5SAndreas Jaekel 		          &rdev, sizeof(rdev));
8222eabeab5SAndreas Jaekel 		memset(buf, 0, sizeof(buf));
8232eabeab5SAndreas Jaekel 		snprintf(buf, sizeof(buf), "%c%d,%d",
8242eabeab5SAndreas Jaekel 		         vp->v_type == VBLK ? 'b' : 'c',
8252eabeab5SAndreas Jaekel 		         getmajor(rdev),
8262eabeab5SAndreas Jaekel 		         getminor(rdev));
8272eabeab5SAndreas Jaekel 		zev_l0_sig(rec->signature.value, buf);
8282eabeab5SAndreas Jaekel 	} else {
8292eabeab5SAndreas Jaekel 		memset(rec->signature.value, 0, sizeof(rec->signature.value));
8302eabeab5SAndreas Jaekel 	}
8312eabeab5SAndreas Jaekel 	rec->signature.level = 0;
8322eabeab5SAndreas Jaekel 	rec->signature.block_offset = 0;
8332eabeab5SAndreas Jaekel }
8342eabeab5SAndreas Jaekel 
835