xref: /titanic_52/usr/src/uts/common/fs/zev/zev_checksums.c (revision 4053e9022400086fffd6baa9c383598624ddcedc)
15e286361SAndreas Jaekel #include <sys/zfs_events.h>
25e286361SAndreas Jaekel #include <sys/zev_checksums.h>
35e286361SAndreas Jaekel #include <sys/fs/zev.h>
45e286361SAndreas Jaekel #include <sys/zfs_znode.h>
55e286361SAndreas Jaekel #include <sys/sha1.h>
65e286361SAndreas Jaekel #include <sys/avl.h>
75e286361SAndreas Jaekel #include <sys/sysmacros.h>
85e286361SAndreas Jaekel #include <sys/fs/zev.h>
9b9710123SAndreas Jaekel #include <sys/zfs_rlock.h>
1080d9297cSAndreas Jaekel #include <sys/list.h>
115e286361SAndreas Jaekel 
125e286361SAndreas Jaekel typedef struct zev_sig_cache_chksums_t {
135e286361SAndreas Jaekel 	/* begin of key */
145e286361SAndreas Jaekel 	uint64_t			offset_l1;
155e286361SAndreas Jaekel 	/* end of key */
165e286361SAndreas Jaekel 	avl_node_t			avl_node;
175e286361SAndreas Jaekel 	uint8_t		sigs[ZEV_L1_SIZE/ZEV_L0_SIZE][SHA1_DIGEST_LENGTH];
185e286361SAndreas Jaekel } zev_sig_cache_chksums_t;
195e286361SAndreas Jaekel 
205e286361SAndreas Jaekel typedef struct zev_sig_cache_file_t {
215e286361SAndreas Jaekel 	/* begin of key */
225e286361SAndreas Jaekel 	uint64_t			guid;
235e286361SAndreas Jaekel 	uint64_t			ino;
245e286361SAndreas Jaekel 	uint64_t			gen;
255e286361SAndreas Jaekel 	/* end of key */
265e286361SAndreas Jaekel 	uint32_t			refcnt;
2780d9297cSAndreas Jaekel 	list_node_t			lru_node;
285e286361SAndreas Jaekel 	avl_node_t			avl_node;
295e286361SAndreas Jaekel 	avl_tree_t			chksums;
305e286361SAndreas Jaekel } zev_sig_cache_file_t;
315e286361SAndreas Jaekel 
325e286361SAndreas Jaekel typedef struct zev_sig_cache_t {
335e286361SAndreas Jaekel 	kmutex_t			mutex;
345e286361SAndreas Jaekel 	uint64_t			cache_size;
355e286361SAndreas Jaekel 	uint64_t			max_cache_size;
365e286361SAndreas Jaekel 	uint64_t			hits;
375e286361SAndreas Jaekel 	uint64_t			misses;
3880d9297cSAndreas Jaekel 	list_t				lru;
395e286361SAndreas Jaekel 	avl_tree_t			files;
405e286361SAndreas Jaekel } zev_sig_cache_t;
415e286361SAndreas Jaekel 
425e286361SAndreas Jaekel extern offset_t zfs_read_chunk_size;	/* tuneable from zfs_vnops.c */
435e286361SAndreas Jaekel 
445e286361SAndreas Jaekel static uint8_t all_zero_sig[SHA1_DIGEST_LENGTH] = {
455e286361SAndreas Jaekel 	0x1c, 0xea, 0xf7, 0x3d, 0xf4, 0x0e, 0x53, 0x1d, 0xf3, 0xbf,
465e286361SAndreas Jaekel 	0xb2, 0x6b, 0x4f, 0xb7, 0xcd, 0x95, 0xfb, 0x7b, 0xff, 0x1d
475e286361SAndreas Jaekel };
485e286361SAndreas Jaekel 
495e286361SAndreas Jaekel static uint8_t unknown_sig[SHA1_DIGEST_LENGTH] = {
505e286361SAndreas Jaekel 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
515e286361SAndreas Jaekel 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
525e286361SAndreas Jaekel };
535e286361SAndreas Jaekel 
545e286361SAndreas Jaekel static zev_sig_cache_t	zev_sig_cache;
555e286361SAndreas Jaekel 
565e286361SAndreas Jaekel static int
575e286361SAndreas Jaekel zev_cache_file_cmp(const void *entry_a, const void *entry_b)
585e286361SAndreas Jaekel {
595e286361SAndreas Jaekel 	const zev_sig_cache_file_t *a = entry_a;
605e286361SAndreas Jaekel 	const zev_sig_cache_file_t *b = entry_b;
615e286361SAndreas Jaekel 
625e286361SAndreas Jaekel 	if (a->guid < b->guid)
635e286361SAndreas Jaekel 		return -1;
645e286361SAndreas Jaekel 	if (a->guid > b->guid)
655e286361SAndreas Jaekel 		return 1;
665e286361SAndreas Jaekel 	if (a->ino < b->ino)
675e286361SAndreas Jaekel 		return -1;
685e286361SAndreas Jaekel 	if (a->ino > b->ino)
695e286361SAndreas Jaekel 		return 1;
705e286361SAndreas Jaekel 	if (a->gen < b->gen)
715e286361SAndreas Jaekel 		return -1;
725e286361SAndreas Jaekel 	if (a->gen > b->gen)
735e286361SAndreas Jaekel 		return 1;
745e286361SAndreas Jaekel 	return 0;
755e286361SAndreas Jaekel }
765e286361SAndreas Jaekel 
775e286361SAndreas Jaekel static int
785e286361SAndreas Jaekel zev_chksum_cache_cmp(const void *entry_a, const void *entry_b)
795e286361SAndreas Jaekel {
805e286361SAndreas Jaekel 	const zev_sig_cache_chksums_t *a = entry_a;
815e286361SAndreas Jaekel 	const zev_sig_cache_chksums_t *b = entry_b;
825e286361SAndreas Jaekel 
835e286361SAndreas Jaekel 	if (a->offset_l1 < b->offset_l1)
845e286361SAndreas Jaekel 		return -1;
855e286361SAndreas Jaekel 	if (a->offset_l1 > b->offset_l1)
865e286361SAndreas Jaekel 		return 1;
875e286361SAndreas Jaekel 	return 0;
885e286361SAndreas Jaekel }
895e286361SAndreas Jaekel 
905e286361SAndreas Jaekel /* must be called with zev_sig_cache.mutex held */
915e286361SAndreas Jaekel static void
925e286361SAndreas Jaekel zev_chksum_cache_file_free(zev_sig_cache_file_t *file)
935e286361SAndreas Jaekel {
945e286361SAndreas Jaekel 	zev_sig_cache_chksums_t *cs;
955e286361SAndreas Jaekel 	void *c = NULL; /* cookie */
965e286361SAndreas Jaekel 
975e286361SAndreas Jaekel 	/* remove from lru list */
9880d9297cSAndreas Jaekel 	list_remove(&zev_sig_cache.lru, file);
995e286361SAndreas Jaekel 	/* free resources */
1005e286361SAndreas Jaekel 	avl_remove(&zev_sig_cache.files, file);
1015e286361SAndreas Jaekel 	while ((cs = avl_destroy_nodes(&file->chksums, &c)) != NULL) {
1025e286361SAndreas Jaekel 		zev_sig_cache.cache_size -= sizeof(*cs);
1035e286361SAndreas Jaekel 		zev_free(cs, sizeof(*cs));
1045e286361SAndreas Jaekel 	}
1055e286361SAndreas Jaekel 	avl_destroy(&file->chksums);
1065e286361SAndreas Jaekel 	zev_free(file, sizeof(*file));
1075e286361SAndreas Jaekel 	zev_sig_cache.cache_size -= sizeof(*file);
1085e286361SAndreas Jaekel }
1095e286361SAndreas Jaekel 
1105e286361SAndreas Jaekel void
1115e286361SAndreas Jaekel zev_chksum_init(void)
1125e286361SAndreas Jaekel {
1135e286361SAndreas Jaekel 	memset(&zev_sig_cache, 0, sizeof(zev_sig_cache));
1145e286361SAndreas Jaekel 	mutex_init(&zev_sig_cache.mutex, NULL, MUTEX_DRIVER, NULL);
1155e286361SAndreas Jaekel 	avl_create(&zev_sig_cache.files, zev_cache_file_cmp,
1165e286361SAndreas Jaekel 	           sizeof(zev_sig_cache_file_t),
1175e286361SAndreas Jaekel 	           offsetof(zev_sig_cache_file_t, avl_node));
11880d9297cSAndreas Jaekel 	list_create(&zev_sig_cache.lru,
11980d9297cSAndreas Jaekel 	            sizeof(zev_sig_cache_file_t),
12080d9297cSAndreas Jaekel 	            offsetof(zev_sig_cache_file_t, lru_node));
1215e286361SAndreas Jaekel 	zev_sig_cache.max_cache_size = ZEV_CHKSUM_DEFAULT_CACHE_SIZE;
1225e286361SAndreas Jaekel }
1235e286361SAndreas Jaekel 
1245e286361SAndreas Jaekel void
1255e286361SAndreas Jaekel zev_chksum_fini(void)
1265e286361SAndreas Jaekel {
1275e286361SAndreas Jaekel 	zev_sig_cache_file_t *file;
1285e286361SAndreas Jaekel 
1295e286361SAndreas Jaekel 	mutex_destroy(&zev_sig_cache.mutex);
1305e286361SAndreas Jaekel 	while ((file = avl_first(&zev_sig_cache.files)) != NULL)
1315e286361SAndreas Jaekel 		zev_chksum_cache_file_free(file);
13280d9297cSAndreas Jaekel 	list_destroy(&zev_sig_cache.lru);
1335e286361SAndreas Jaekel 	avl_destroy(&zev_sig_cache.files);
1345e286361SAndreas Jaekel }
1355e286361SAndreas Jaekel 
1365e286361SAndreas Jaekel static zev_sig_cache_file_t *
1375e286361SAndreas Jaekel zev_chksum_cache_file_get_and_hold(znode_t *zp)
1385e286361SAndreas Jaekel {
1395e286361SAndreas Jaekel 	zev_sig_cache_file_t find_file;
1405e286361SAndreas Jaekel 	zev_sig_cache_file_t *file;
1415e286361SAndreas Jaekel 	avl_index_t where;
1425e286361SAndreas Jaekel 
14316a2f000SSimon Klinkert 	find_file.guid =
14416a2f000SSimon Klinkert 		dsl_dataset_phys(zp->z_zfsvfs->z_os->os_dsl_dataset)->ds_guid;
1455e286361SAndreas Jaekel 	find_file.ino = zp->z_id;
1465e286361SAndreas Jaekel 	find_file.gen = zp->z_gen;
1475e286361SAndreas Jaekel 
1485e286361SAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
1495e286361SAndreas Jaekel 	file = avl_find(&zev_sig_cache.files, &find_file, &where);
1505e286361SAndreas Jaekel 	if (!file) {
1515e286361SAndreas Jaekel 		file = zev_alloc(sizeof(*file));
1525e286361SAndreas Jaekel 		file->guid =
15316a2f000SSimon Klinkert 		    dsl_dataset_phys(zp->z_zfsvfs->z_os->os_dsl_dataset)->ds_guid;
1545e286361SAndreas Jaekel 		file->ino = zp->z_id;
1555e286361SAndreas Jaekel 		file->gen = zp->z_gen;
1565e286361SAndreas Jaekel 		file->refcnt = 0;
1575e286361SAndreas Jaekel 		avl_create(&file->chksums, zev_chksum_cache_cmp,
1585e286361SAndreas Jaekel 		           sizeof(zev_sig_cache_chksums_t),
1595e286361SAndreas Jaekel 		           offsetof(zev_sig_cache_chksums_t, avl_node));
16080d9297cSAndreas Jaekel 		list_insert_head(&zev_sig_cache.lru, file);
1615e286361SAndreas Jaekel 		avl_insert(&zev_sig_cache.files, file, where);
1625e286361SAndreas Jaekel 		zev_sig_cache.cache_size += sizeof(*file);
1635e286361SAndreas Jaekel 	}
1645e286361SAndreas Jaekel 	file->refcnt++;
1655e286361SAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
1665e286361SAndreas Jaekel 	return file;
1675e286361SAndreas Jaekel }
1685e286361SAndreas Jaekel 
1695e286361SAndreas Jaekel static void
1705e286361SAndreas Jaekel zev_chksum_cache_file_release(zev_sig_cache_file_t *file)
1715e286361SAndreas Jaekel {
1725e286361SAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
1735e286361SAndreas Jaekel 
1745e286361SAndreas Jaekel 	/* We don't invalidate/free/destroy *file. Cache expiry does that */
1755e286361SAndreas Jaekel 	file->refcnt--;
1765e286361SAndreas Jaekel 
1775e286361SAndreas Jaekel 	/* Move file to front of lru list */
17880d9297cSAndreas Jaekel 	list_remove(&zev_sig_cache.lru, file);
17980d9297cSAndreas Jaekel 	list_insert_head(&zev_sig_cache.lru, file);
1805e286361SAndreas Jaekel 
1815e286361SAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
1825e286361SAndreas Jaekel }
1835e286361SAndreas Jaekel 
1845e286361SAndreas Jaekel static  zev_sig_cache_chksums_t *
1855e286361SAndreas Jaekel zev_chksum_cache_get_lv1_entry(zev_sig_cache_file_t *file, uint64_t off_l1)
1865e286361SAndreas Jaekel {
1875e286361SAndreas Jaekel 	zev_sig_cache_chksums_t find_chksum;
1885e286361SAndreas Jaekel 	zev_sig_cache_chksums_t *cs;
1895e286361SAndreas Jaekel 	avl_index_t where;
1905e286361SAndreas Jaekel 
19159938026SAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
19259938026SAndreas Jaekel 
1935e286361SAndreas Jaekel 	find_chksum.offset_l1 = off_l1;
1945e286361SAndreas Jaekel 	cs = avl_find(&file->chksums, &find_chksum, &where);
1955e286361SAndreas Jaekel 	if (!cs) {
1965e286361SAndreas Jaekel 		cs = zev_zalloc(sizeof(*cs));
1975e286361SAndreas Jaekel 		cs->offset_l1 = off_l1;
1985e286361SAndreas Jaekel 		avl_insert(&file->chksums, cs, where);
1995e286361SAndreas Jaekel 		zev_sig_cache.cache_size += sizeof(*cs);
2005e286361SAndreas Jaekel 	}
20159938026SAndreas Jaekel 
20259938026SAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
20359938026SAndreas Jaekel 
2045e286361SAndreas Jaekel 	return cs;
2055e286361SAndreas Jaekel }
2065e286361SAndreas Jaekel 
2075e286361SAndreas Jaekel void
2085e286361SAndreas Jaekel zev_chksum_stats(uint64_t *c_size, uint64_t *c_hits, uint64_t *c_misses)
2095e286361SAndreas Jaekel {
2105e286361SAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
2115e286361SAndreas Jaekel 	*c_size = zev_sig_cache.cache_size;
2125e286361SAndreas Jaekel 	*c_hits = zev_sig_cache.hits;
2135e286361SAndreas Jaekel 	*c_misses = zev_sig_cache.misses;
2145e286361SAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
2155e286361SAndreas Jaekel }
2165e286361SAndreas Jaekel 
2175e286361SAndreas Jaekel static void
2185e286361SAndreas Jaekel zev_chksum_cache_invalidate(zev_sig_cache_file_t *file,
2195e286361SAndreas Jaekel                             znode_t *zp,
2205e286361SAndreas Jaekel                             zev_chksum_mode_t mode,
2215e286361SAndreas Jaekel                             uint64_t off,
2225e286361SAndreas Jaekel                             uint64_t len)
2235e286361SAndreas Jaekel {
2245e286361SAndreas Jaekel 	zev_sig_cache_chksums_t find_chksum;
2255e286361SAndreas Jaekel 	zev_sig_cache_chksums_t *cs;
2265e286361SAndreas Jaekel 	int idx;
2275e286361SAndreas Jaekel 	uint64_t off_l1;
2285e286361SAndreas Jaekel 	uint64_t len_l1;
2295e286361SAndreas Jaekel 	uint64_t pos_l0;
2305e286361SAndreas Jaekel 	uint64_t pos_l1;
2315e286361SAndreas Jaekel 
2325e286361SAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
2335e286361SAndreas Jaekel 
2345e286361SAndreas Jaekel 	/* start of this megabyte */
2355e286361SAndreas Jaekel 	off_l1 = P2ALIGN(off, ZEV_L1_SIZE);
2365e286361SAndreas Jaekel 
2375e286361SAndreas Jaekel 	if (len == 0) {
2385e286361SAndreas Jaekel 		/* truncate() to EOF */
2395e286361SAndreas Jaekel 		len_l1 = ZEV_L1_SIZE;
2405e286361SAndreas Jaekel 	} else {
2415e286361SAndreas Jaekel 		/* full megabytes */
2425e286361SAndreas Jaekel 		len_l1 = len + (off - off_l1);
2435e286361SAndreas Jaekel 		len_l1 = P2ROUNDUP(len_l1, ZEV_L1_SIZE);
2445e286361SAndreas Jaekel 	}
2455e286361SAndreas Jaekel 
2465e286361SAndreas Jaekel 	for (pos_l1 = off_l1; pos_l1 < (off_l1+len_l1); pos_l1 += ZEV_L1_SIZE) {
2475e286361SAndreas Jaekel 
2485e286361SAndreas Jaekel 		find_chksum.offset_l1 = pos_l1;
2495e286361SAndreas Jaekel 		cs = avl_find(&file->chksums, &find_chksum, NULL);
2505e286361SAndreas Jaekel 		if (!cs)
2515e286361SAndreas Jaekel 			continue;
2525e286361SAndreas Jaekel 
2535e286361SAndreas Jaekel 		for (pos_l0 = MAX(pos_l1, P2ALIGN(off, ZEV_L0_SIZE));
2545e286361SAndreas Jaekel 		     pos_l0 < (pos_l1 + ZEV_L1_SIZE);
2555e286361SAndreas Jaekel 		     pos_l0 += ZEV_L0_SIZE){
2565e286361SAndreas Jaekel 
257f96935a8SAndreas Jaekel 			if ((len > 0) && (pos_l0 > (off + len - 1)))
2585e286361SAndreas Jaekel 				break;
2595e286361SAndreas Jaekel 
2605e286361SAndreas Jaekel 			idx = (pos_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
2615e286361SAndreas Jaekel 			memcpy(cs->sigs[idx], unknown_sig, SHA1_DIGEST_LENGTH);
2625e286361SAndreas Jaekel 		}
2635e286361SAndreas Jaekel 	}
2645e286361SAndreas Jaekel 
2655e286361SAndreas Jaekel 	if (len == 0) {
2665e286361SAndreas Jaekel 		/* truncate() to EOF -> invalidate all l1 sigs beyond EOF */
2675e286361SAndreas Jaekel 		while ((cs = avl_last(&file->chksums)) != NULL) {
2685e286361SAndreas Jaekel 			if (cs->offset_l1 < zp->z_size)
2695e286361SAndreas Jaekel 				break;
2705e286361SAndreas Jaekel 			avl_remove(&file->chksums, cs);
2715e286361SAndreas Jaekel 			zev_sig_cache.cache_size -= sizeof(*cs);
2725e286361SAndreas Jaekel 			zev_free(cs, sizeof(*cs));
2735e286361SAndreas Jaekel 		}
2745e286361SAndreas Jaekel 	}
2755e286361SAndreas Jaekel 
2765e286361SAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
2775e286361SAndreas Jaekel }
2785e286361SAndreas Jaekel 
2795e286361SAndreas Jaekel static int
2805e286361SAndreas Jaekel zev_chksum_cache_get(uint8_t *dst,
2815e286361SAndreas Jaekel                      zev_sig_cache_file_t *file,
2825e286361SAndreas Jaekel                      zev_sig_cache_chksums_t *cs,
2835e286361SAndreas Jaekel                      uint64_t off_l0)
2845e286361SAndreas Jaekel {
2855e286361SAndreas Jaekel 	int idx;
2865e286361SAndreas Jaekel 
2875e286361SAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
2885e286361SAndreas Jaekel 
2895e286361SAndreas Jaekel 	idx = (off_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
2905e286361SAndreas Jaekel 	if (!memcmp(cs->sigs[idx], unknown_sig, SHA1_DIGEST_LENGTH)) {
2915e286361SAndreas Jaekel 		zev_sig_cache.misses++;
2925e286361SAndreas Jaekel 		mutex_exit(&zev_sig_cache.mutex);
2935e286361SAndreas Jaekel 		return ENOENT;
2945e286361SAndreas Jaekel 	}
2955e286361SAndreas Jaekel 	memcpy(dst, cs->sigs[idx], SHA1_DIGEST_LENGTH);
2965e286361SAndreas Jaekel 	zev_sig_cache.hits++;
2975e286361SAndreas Jaekel 
2985e286361SAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
2995e286361SAndreas Jaekel 	return 0;
3005e286361SAndreas Jaekel }
3015e286361SAndreas Jaekel 
3025e286361SAndreas Jaekel static void
3035e286361SAndreas Jaekel zev_chksum_cache_put(uint8_t *sig,
3045e286361SAndreas Jaekel                      zev_sig_cache_file_t *file,
3055e286361SAndreas Jaekel                      zev_sig_cache_chksums_t *cs,
3065e286361SAndreas Jaekel                      uint64_t off_l0)
3075e286361SAndreas Jaekel {
3085e286361SAndreas Jaekel 	zev_sig_cache_file_t *f;
309a0e77e28SAndreas Jaekel 	zev_sig_cache_file_t *tmp;
3105e286361SAndreas Jaekel 	int idx;
3115e286361SAndreas Jaekel 
3125e286361SAndreas Jaekel 	mutex_enter(&zev_sig_cache.mutex);
3135e286361SAndreas Jaekel 
3145e286361SAndreas Jaekel 	if (zev_sig_cache.max_cache_size == 0) {
3155e286361SAndreas Jaekel 		/* cache disabled */
3165e286361SAndreas Jaekel 		mutex_exit(&zev_sig_cache.mutex);
3175e286361SAndreas Jaekel 		return;
3185e286361SAndreas Jaekel 	}
3195e286361SAndreas Jaekel 
3205e286361SAndreas Jaekel 	/* expire entries until there's room in the cache */
32180d9297cSAndreas Jaekel 	f = list_tail(&zev_sig_cache.lru);
322a0e77e28SAndreas Jaekel 	while (f && (zev_sig_cache.cache_size > zev_sig_cache.max_cache_size)){
323a0e77e28SAndreas Jaekel 		tmp = f;
32480d9297cSAndreas Jaekel 		f = list_prev(&zev_sig_cache.lru, f);
325a0e77e28SAndreas Jaekel 		if (tmp->refcnt == 0)
326a0e77e28SAndreas Jaekel 			zev_chksum_cache_file_free(tmp);
3275e286361SAndreas Jaekel 	}
3285e286361SAndreas Jaekel 
3295e286361SAndreas Jaekel 	idx = (off_l0 % ZEV_L1_SIZE) / ZEV_L0_SIZE;
3305e286361SAndreas Jaekel 	memcpy(cs->sigs[idx], sig, SHA1_DIGEST_LENGTH);
3315e286361SAndreas Jaekel 
3325e286361SAndreas Jaekel 	mutex_exit(&zev_sig_cache.mutex);
3335e286361SAndreas Jaekel 	return;
3345e286361SAndreas Jaekel }
3355e286361SAndreas Jaekel 
3365e286361SAndreas Jaekel /* verbatim from zfs_vnops.c (unfortunatly it's declared static, there) */
3375e286361SAndreas Jaekel static int
3385e286361SAndreas Jaekel mappedread(vnode_t *vp, int nbytes, uio_t *uio)
3395e286361SAndreas Jaekel {
3405e286361SAndreas Jaekel 	znode_t *zp = VTOZ(vp);
3415e286361SAndreas Jaekel 	objset_t *os = zp->z_zfsvfs->z_os;
3425e286361SAndreas Jaekel 	int64_t	start, off;
3435e286361SAndreas Jaekel 	int len = nbytes;
3445e286361SAndreas Jaekel 	int error = 0;
3455e286361SAndreas Jaekel 
3465e286361SAndreas Jaekel 	start = uio->uio_loffset;
3475e286361SAndreas Jaekel 	off = start & PAGEOFFSET;
3485e286361SAndreas Jaekel 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
3495e286361SAndreas Jaekel 		page_t *pp;
3505e286361SAndreas Jaekel 		uint64_t bytes = MIN(PAGESIZE - off, len);
3515e286361SAndreas Jaekel 
3525e286361SAndreas Jaekel 		if (pp = page_lookup(vp, start, SE_SHARED)) {
3535e286361SAndreas Jaekel 			caddr_t va;
3545e286361SAndreas Jaekel 
3555e286361SAndreas Jaekel 			va = zfs_map_page(pp, S_READ);
3565e286361SAndreas Jaekel 			error = uiomove(va + off, bytes, UIO_READ, uio);
3575e286361SAndreas Jaekel 			zfs_unmap_page(pp, va);
3585e286361SAndreas Jaekel 			page_unlock(pp);
3595e286361SAndreas Jaekel 		} else {
3605e286361SAndreas Jaekel 			error = dmu_read_uio(os, zp->z_id, uio, bytes);
3615e286361SAndreas Jaekel 		}
3625e286361SAndreas Jaekel 		len -= bytes;
3635e286361SAndreas Jaekel 		off = 0;
3645e286361SAndreas Jaekel 		if (error)
3655e286361SAndreas Jaekel 			break;
3665e286361SAndreas Jaekel 	}
3675e286361SAndreas Jaekel 	return (error);
3685e286361SAndreas Jaekel }
3695e286361SAndreas Jaekel 
3705e286361SAndreas Jaekel static int
3715e286361SAndreas Jaekel zev_safe_read(znode_t *zp, char *buf, uint64_t off, uint64_t len)
3725e286361SAndreas Jaekel {
3735e286361SAndreas Jaekel 	uio_t		uio;
3745e286361SAndreas Jaekel 	struct iovec	iov;
3755e286361SAndreas Jaekel 	ssize_t		n;
3765e286361SAndreas Jaekel 	ssize_t		nbytes;
3775e286361SAndreas Jaekel 	int		error = 0;
3785e286361SAndreas Jaekel 	vnode_t		*vp = ZTOV(zp);
3795e286361SAndreas Jaekel 	objset_t	*os = zp->z_zfsvfs->z_os;
3805e286361SAndreas Jaekel 
3815e286361SAndreas Jaekel 	/* set up uio */
3825e286361SAndreas Jaekel 
3835e286361SAndreas Jaekel 	iov.iov_base = buf;
3845e286361SAndreas Jaekel 	iov.iov_len = ZEV_L0_SIZE;
3855e286361SAndreas Jaekel 
3865e286361SAndreas Jaekel 	uio.uio_iov = &iov;
3875e286361SAndreas Jaekel 	uio.uio_iovcnt = 1;
3885e286361SAndreas Jaekel 	uio.uio_segflg = (short)UIO_SYSSPACE;
3895e286361SAndreas Jaekel 	uio.uio_llimit = RLIM64_INFINITY;
3905e286361SAndreas Jaekel 	uio.uio_fmode = FREAD;
3915e286361SAndreas Jaekel 	uio.uio_extflg = UIO_COPY_DEFAULT;
3925e286361SAndreas Jaekel 
3935e286361SAndreas Jaekel 	uio.uio_loffset = off;
3945e286361SAndreas Jaekel 	uio.uio_resid = len;
3955e286361SAndreas Jaekel 
3965e286361SAndreas Jaekel again:
3975e286361SAndreas Jaekel 	if (uio.uio_loffset >= zp->z_size)
3985e286361SAndreas Jaekel 		return EINVAL;
3995e286361SAndreas Jaekel 
4005e286361SAndreas Jaekel 	/* don't read past EOF */
4015e286361SAndreas Jaekel 	n = MIN(uio.uio_resid, zp->z_size - uio.uio_loffset);
4025e286361SAndreas Jaekel 
4035e286361SAndreas Jaekel 	/* this block was essentially copied from zfs_read() in zfs_vnops.c */
4045e286361SAndreas Jaekel 	while (n > 0) {
4055e286361SAndreas Jaekel 		nbytes = MIN(n, zfs_read_chunk_size -
4065e286361SAndreas Jaekel 		    P2PHASE(uio.uio_loffset, zfs_read_chunk_size));
4075e286361SAndreas Jaekel 
4085e286361SAndreas Jaekel 		if (vn_has_cached_data(vp)) {
4095e286361SAndreas Jaekel 			error = mappedread(vp, nbytes, &uio);
4105e286361SAndreas Jaekel 		} else {
4115e286361SAndreas Jaekel 			error = dmu_read_uio(os, zp->z_id, &uio, nbytes);
4125e286361SAndreas Jaekel 		}
4135e286361SAndreas Jaekel 		if (error) {
414df8caf2dSSimon Klinkert 			if (error == EINTR)
4155e286361SAndreas Jaekel 				goto again;
4165e286361SAndreas Jaekel 			/* convert checksum errors into IO errors */
4175e286361SAndreas Jaekel 			if (error == ECKSUM)
4185e286361SAndreas Jaekel 				error = SET_ERROR(EIO);
4195e286361SAndreas Jaekel 			break;
4205e286361SAndreas Jaekel 		}
4215e286361SAndreas Jaekel 
4225e286361SAndreas Jaekel 		n -= nbytes;
4235e286361SAndreas Jaekel 	}
4245e286361SAndreas Jaekel 
4255e286361SAndreas Jaekel 	if (error)
4265e286361SAndreas Jaekel 		return error;
4275e286361SAndreas Jaekel 	return len - uio.uio_resid;
4285e286361SAndreas Jaekel }
4295e286361SAndreas Jaekel 
4305e286361SAndreas Jaekel static void
4315e286361SAndreas Jaekel zev_l0_sig(uint8_t *sig, char *buf)
4325e286361SAndreas Jaekel {
4335e286361SAndreas Jaekel 	SHA1_CTX	ctx;
4345e286361SAndreas Jaekel 
4355e286361SAndreas Jaekel 	SHA1Init(&ctx);
4365e286361SAndreas Jaekel 	SHA1Update(&ctx, buf, ZEV_L0_SIZE);
4375e286361SAndreas Jaekel 	SHA1Final(sig, &ctx);
4385e286361SAndreas Jaekel 	return;
4395e286361SAndreas Jaekel }
4405e286361SAndreas Jaekel 
4415e286361SAndreas Jaekel static void
4425e286361SAndreas Jaekel zev_l0_blocksig(uint8_t *blk_sig, uint8_t *l0_sig, uint8_t block_no)
4435e286361SAndreas Jaekel {
4445e286361SAndreas Jaekel 	SHA1_CTX	ctx;
4455e286361SAndreas Jaekel 
4465e286361SAndreas Jaekel 	SHA1Init(&ctx);
4475e286361SAndreas Jaekel 	SHA1Update(&ctx, l0_sig, SHA1_DIGEST_LENGTH);
4485e286361SAndreas Jaekel 	SHA1Update(&ctx, &block_no, sizeof(block_no));
4495e286361SAndreas Jaekel 	SHA1Final(blk_sig, &ctx);
4505e286361SAndreas Jaekel 	return;
4515e286361SAndreas Jaekel }
4525e286361SAndreas Jaekel 
4535e286361SAndreas Jaekel static void
4545e286361SAndreas Jaekel zev_l1_add(uint8_t *sig_l1, uint8_t *sig_l0)
4555e286361SAndreas Jaekel {
4565e286361SAndreas Jaekel 	int	i;
4575e286361SAndreas Jaekel 	int	s;
4585e286361SAndreas Jaekel 	int	carry = 0;
4595e286361SAndreas Jaekel 
4605e286361SAndreas Jaekel 	for (i = SHA1_DIGEST_LENGTH - 1; i >= 0; --i) {
4615e286361SAndreas Jaekel 		s = sig_l1[i] + sig_l0[i] + carry;
4625e286361SAndreas Jaekel 		carry = s > 255 ? 1 : 0;
4635e286361SAndreas Jaekel 		sig_l1[i] = s & 0xff;
4645e286361SAndreas Jaekel 	}
4655e286361SAndreas Jaekel }
4665e286361SAndreas Jaekel 
467b9710123SAndreas Jaekel static int
4685e286361SAndreas Jaekel zev_get_result_buffer(zev_sig_t **buffer,
4695e286361SAndreas Jaekel                       uint64_t *buffer_len,
470b9710123SAndreas Jaekel                       uint64_t max_buffer_len,
4715e286361SAndreas Jaekel                       znode_t *zp,
4725e286361SAndreas Jaekel                       uint64_t off,
4735e286361SAndreas Jaekel                       uint64_t len,
4745e286361SAndreas Jaekel                       zev_chksum_mode_t mode)
4755e286361SAndreas Jaekel {
4765e286361SAndreas Jaekel 	uint64_t	blk_start;
4775e286361SAndreas Jaekel 	uint64_t	blk_end;
4785e286361SAndreas Jaekel 	uint64_t	l0_blocks;
4795e286361SAndreas Jaekel 	uint64_t	l1_blocks;
4805e286361SAndreas Jaekel 	uint64_t	sigs;
4815e286361SAndreas Jaekel 	int buflen;
4825e286361SAndreas Jaekel 
4835e286361SAndreas Jaekel 	/* calculate result set size: how many checksums will we provide? */
4845e286361SAndreas Jaekel 
4855e286361SAndreas Jaekel 	ASSERT(len > 0 || (mode == zev_truncate && len == 0));
4865e286361SAndreas Jaekel 
4875e286361SAndreas Jaekel 	if (len == 0) {
4885e286361SAndreas Jaekel 		/* truncate */
4895e286361SAndreas Jaekel 		l0_blocks = ((off % ZEV_L0_SIZE) == 0) ? 0 : 1;
4905e286361SAndreas Jaekel 		l1_blocks = ((off % ZEV_L1_SIZE) == 0) ? 0 : 1;
4915e286361SAndreas Jaekel 	} else {
4925e286361SAndreas Jaekel 		/* how many lv1 checksums do we update? */
4935e286361SAndreas Jaekel 		blk_start = off / ZEV_L1_SIZE;
4945e286361SAndreas Jaekel 		blk_end = (off + len - 1) / ZEV_L1_SIZE;
4955e286361SAndreas Jaekel 		l1_blocks = blk_end - blk_start + 1;
4965e286361SAndreas Jaekel 		/* how many lv0 checksums do we update? */
4975e286361SAndreas Jaekel 		blk_start = off / ZEV_L0_SIZE;
4985e286361SAndreas Jaekel 		blk_end = (off + len - 1) / ZEV_L0_SIZE;
4995e286361SAndreas Jaekel 		l0_blocks = blk_end - blk_start + 1;
5005e286361SAndreas Jaekel 	}
5015e286361SAndreas Jaekel 
5025e286361SAndreas Jaekel 	sigs = l1_blocks + l0_blocks;
5035e286361SAndreas Jaekel 	if (sigs == 0) {
5045e286361SAndreas Jaekel 		*buffer = NULL;
5055e286361SAndreas Jaekel 		*buffer_len = 0;
506b9710123SAndreas Jaekel 		return 0;
5075e286361SAndreas Jaekel 	}
5085e286361SAndreas Jaekel 
5095e286361SAndreas Jaekel 	buflen = sigs * sizeof(zev_sig_t);
510b9710123SAndreas Jaekel 	if (max_buffer_len && (buflen > max_buffer_len)) {
511b9710123SAndreas Jaekel 		*buffer = NULL;
512b9710123SAndreas Jaekel 		*buffer_len = 0;
513b9710123SAndreas Jaekel 		return ENOSPC;
514b9710123SAndreas Jaekel 	}
5155e286361SAndreas Jaekel 	*buffer_len = buflen;
5165e286361SAndreas Jaekel 	*buffer = zev_alloc(buflen);
517b9710123SAndreas Jaekel 	return 0;
5185e286361SAndreas Jaekel }
5195e286361SAndreas Jaekel 
5205e286361SAndreas Jaekel static void
5215e286361SAndreas Jaekel zev_append_sig(zev_sig_t *s, int level, uint64_t off, uint8_t *sig)
5225e286361SAndreas Jaekel {
5235e286361SAndreas Jaekel 	s->level = level;
5245e286361SAndreas Jaekel 	s->block_offset = off;
5255e286361SAndreas Jaekel 	memcpy(s->value, sig, SHA1_DIGEST_LENGTH);
5265e286361SAndreas Jaekel }
5275e286361SAndreas Jaekel 
5285e286361SAndreas Jaekel /*
5295e286361SAndreas Jaekel  * Calculate all l0 and l1 checksums that are affected by the given range.
5305e286361SAndreas Jaekel  *
5315e286361SAndreas Jaekel  * This function assumes that the ranges it needs to read are already
5325e286361SAndreas Jaekel  * range-locked.
5335e286361SAndreas Jaekel  */
5345e286361SAndreas Jaekel int
5355e286361SAndreas Jaekel zev_get_checksums(zev_sig_t **result,
5365e286361SAndreas Jaekel                   uint64_t *result_buf_len,
5375e286361SAndreas Jaekel                   uint64_t *signature_cnt,
538b9710123SAndreas Jaekel                   uint64_t max_result_len,
5395e286361SAndreas Jaekel                   znode_t *zp,
5405e286361SAndreas Jaekel                   uint64_t off,
5415e286361SAndreas Jaekel                   uint64_t len,
5425e286361SAndreas Jaekel                   zev_chksum_mode_t mode)
5435e286361SAndreas Jaekel {
5445e286361SAndreas Jaekel 	uint64_t	off_l1;
5455e286361SAndreas Jaekel 	uint64_t	len_l1;
5465e286361SAndreas Jaekel 	uint64_t	pos_l1;
5475e286361SAndreas Jaekel 	uint64_t	pos_l0;
5485e286361SAndreas Jaekel 	char		*buf;
5495e286361SAndreas Jaekel 	int64_t		ret;
5505e286361SAndreas Jaekel 	uint8_t		sig_l0[SHA1_DIGEST_LENGTH];
5515e286361SAndreas Jaekel 	uint8_t		blk_sig_l0[SHA1_DIGEST_LENGTH];
5525e286361SAndreas Jaekel 	uint8_t		sig_l1[SHA1_DIGEST_LENGTH];
5535e286361SAndreas Jaekel 	uint8_t		l0_block_no;
5545e286361SAndreas Jaekel 	zev_sig_t	*sig;
5555e286361SAndreas Jaekel 	int		non_empty_l0_blocks;
5565e286361SAndreas Jaekel 	zev_sig_cache_file_t *file;
5575e286361SAndreas Jaekel 	zev_sig_cache_chksums_t *cs;
5585e286361SAndreas Jaekel 
5595e286361SAndreas Jaekel 	/*
5605e286361SAndreas Jaekel 	 * Note: for write events, the callback is called via
5615e286361SAndreas Jaekel 	 *    zfs_write() -> zfs_log_write() -> zev_znode_write_cb()
5625e286361SAndreas Jaekel 	 *
5635e286361SAndreas Jaekel 	 * The transaction is not commited, yet.
5645e286361SAndreas Jaekel 	 *
5655e286361SAndreas Jaekel 	 * A write() syscall might be split into smaller chunks by zfs_write()
5665e286361SAndreas Jaekel 	 *
5675e286361SAndreas Jaekel 	 * zfs_write() has a range lock when this is called. (zfs_vnops.c:925)
5685e286361SAndreas Jaekel 	 * In zev mode, the range lock will encompass all data we need
5695e286361SAndreas Jaekel 	 * to calculate our checksums.
5705e286361SAndreas Jaekel 	 *
5715e286361SAndreas Jaekel 	 * The same is true for truncates with non-zero length. ("punch hole")
5725e286361SAndreas Jaekel 	 */
5735e286361SAndreas Jaekel 
5745e286361SAndreas Jaekel 	ASSERT(len > 0 || (mode == zev_truncate && len == 0));
5755e286361SAndreas Jaekel 	*signature_cnt = 0;
5765e286361SAndreas Jaekel 
577c2200253SAndreas Jaekel 	/*
578c2200253SAndreas Jaekel 	 * Under certain circumstances we need the first l0 block's
579c2200253SAndreas Jaekel 	 * checksum, because we didn't store it in the database and
580c2200253SAndreas Jaekel 	 * can't easily get it from userspace.  Not for this exact point
581c2200253SAndreas Jaekel 	 * in time, anyway.  So we cheat a little.
582c2200253SAndreas Jaekel 	 */
583c2200253SAndreas Jaekel 	if (mode == zev_truncate && len == 0 && off == 4096) {
584c2200253SAndreas Jaekel 		/*
585c2200253SAndreas Jaekel 		 * Normally, we'd report no checkums:
586c2200253SAndreas Jaekel 		 *  - no l0 sum, because no remaining l0 block is changed
587c2200253SAndreas Jaekel 		 *  - no l1 sum, because the file is now too short for l1 sums
588c2200253SAndreas Jaekel 		 * Let's pretend we changed the first l0 block, then.
589c2200253SAndreas Jaekel 		 * Luckily the entire file is range locked during truncate().
590c2200253SAndreas Jaekel 		 */
591c2200253SAndreas Jaekel 		off = 0;
592c2200253SAndreas Jaekel 		len = 4096;
593c2200253SAndreas Jaekel 	}
594c2200253SAndreas Jaekel 
5955e286361SAndreas Jaekel 	/* start of this megabyte */
5965e286361SAndreas Jaekel 	off_l1 = P2ALIGN(off, ZEV_L1_SIZE);
5975e286361SAndreas Jaekel 	/* full megabytes */
5985e286361SAndreas Jaekel 	if (len == 0) {
5995e286361SAndreas Jaekel 		/* truncate(): we'll look at the last lv1 block, only. */
6005e286361SAndreas Jaekel 		len_l1 = ZEV_L1_SIZE;
6015e286361SAndreas Jaekel 	} else {
6025e286361SAndreas Jaekel 		len_l1 = len + (off - off_l1);
6035e286361SAndreas Jaekel 		len_l1 = P2ROUNDUP(len_l1, ZEV_L1_SIZE);
6045e286361SAndreas Jaekel 	}
6055e286361SAndreas Jaekel 
6065e286361SAndreas Jaekel 	file = zev_chksum_cache_file_get_and_hold(zp);
6075e286361SAndreas Jaekel 	zev_chksum_cache_invalidate(file, zp, mode, off, len);
6085e286361SAndreas Jaekel 	buf = zev_alloc(ZEV_L0_SIZE);
6095e286361SAndreas Jaekel 
610b9710123SAndreas Jaekel 	ret = zev_get_result_buffer(result, result_buf_len, max_result_len,
611b9710123SAndreas Jaekel 	                            zp, off, len, mode);
612b9710123SAndreas Jaekel 	if (ret) {
613b9710123SAndreas Jaekel 		zev_free(buf, ZEV_L0_SIZE);
614b9710123SAndreas Jaekel 		zev_chksum_cache_file_release(file);
615b9710123SAndreas Jaekel 		return ret;
616b9710123SAndreas Jaekel 	}
6175e286361SAndreas Jaekel 	if (*result == NULL) {
6185e286361SAndreas Jaekel 		/* we're done */
6195e286361SAndreas Jaekel 		zev_free(buf, ZEV_L0_SIZE);
6205e286361SAndreas Jaekel 		zev_chksum_cache_file_release(file);
6215e286361SAndreas Jaekel 		return 0;
6225e286361SAndreas Jaekel 	}
6235e286361SAndreas Jaekel 	sig = *result;
6245e286361SAndreas Jaekel 
6255e286361SAndreas Jaekel 	for (pos_l1 = off_l1; pos_l1 < (off_l1+len_l1); pos_l1 += ZEV_L1_SIZE) {
6265e286361SAndreas Jaekel 
6275e286361SAndreas Jaekel 		if (pos_l1 > zp->z_size) {
6285e286361SAndreas Jaekel 			cmn_err(CE_WARN, "zev_get_checksums: off+len beyond "
62986dc7a29SSimon Klinkert 			        "EOF. Unexpected behaviour; please fix! "
63086dc7a29SSimon Klinkert 				"off=%" PRIu64 ", len=%" PRIu64 ", "
63186dc7a29SSimon Klinkert 			        "dataset='%s', inode=%" PRIu64, off, len,
63286dc7a29SSimon Klinkert 				zp->z_zfsvfs->z_os->
633266747efSAndreas Jaekel 			        os_dsl_dataset->ds_dir->dd_myname, zp->z_id);
634266747efSAndreas Jaekel 			zev_free(*result, *result_buf_len);
635266747efSAndreas Jaekel 			*result = NULL;
636*4053e902SSimon Klinkert 			zev_free(buf, ZEV_L0_SIZE);
637*4053e902SSimon Klinkert 			zev_chksum_cache_file_release(file);
638*4053e902SSimon Klinkert 			return EIO;
6395e286361SAndreas Jaekel 		}
6405e286361SAndreas Jaekel 
6415e286361SAndreas Jaekel 		/*
6425e286361SAndreas Jaekel 		 * Since we have a reference to 'file' 'cs' can't be expired.
6435e286361SAndreas Jaekel 		 * Since our ranges are range locked, other threads woun't
6445e286361SAndreas Jaekel 		 * touch our checksum entries. (not even read them)
6455e286361SAndreas Jaekel 		 * Hence, we don't need to hold() or release() 'cs'.
6465e286361SAndreas Jaekel 		 */
6475e286361SAndreas Jaekel 		cs = zev_chksum_cache_get_lv1_entry(file, pos_l1);
6485e286361SAndreas Jaekel 
6495e286361SAndreas Jaekel 		l0_block_no = 0;
6505e286361SAndreas Jaekel 		non_empty_l0_blocks = 0;
6515e286361SAndreas Jaekel 		bzero(sig_l1, sizeof(sig_l1));
6525e286361SAndreas Jaekel 		for (pos_l0 = pos_l1;
6535e286361SAndreas Jaekel 		     pos_l0 < (pos_l1 + ZEV_L1_SIZE);
6545e286361SAndreas Jaekel 		     pos_l0 += ZEV_L0_SIZE){
6555e286361SAndreas Jaekel 
6565e286361SAndreas Jaekel 			if (pos_l0 >= zp->z_size)
6575e286361SAndreas Jaekel 				break;	/* EOF */
6585e286361SAndreas Jaekel 
6595e286361SAndreas Jaekel 			if (zev_chksum_cache_get(sig_l0, file,cs,pos_l0) != 0) {
6605e286361SAndreas Jaekel 
6615e286361SAndreas Jaekel 				/* signature is not cached, yet. */
6625e286361SAndreas Jaekel 				ret = zev_safe_read(zp, buf,
6635e286361SAndreas Jaekel 				                    pos_l0, ZEV_L0_SIZE);
6645e286361SAndreas Jaekel 				if (ret < 0) {
6655e286361SAndreas Jaekel 					zev_free(*result, *result_buf_len);
6665e286361SAndreas Jaekel 					zev_free(buf, ZEV_L0_SIZE);
6675e286361SAndreas Jaekel 					zev_chksum_cache_file_release(file);
6685e286361SAndreas Jaekel 					return ret;
6695e286361SAndreas Jaekel 				}
6705e286361SAndreas Jaekel 				/* pad buffer with zeros if necessary */
6715e286361SAndreas Jaekel 				if (ret < ZEV_L0_SIZE)
6725e286361SAndreas Jaekel 					bzero(buf + ret, ZEV_L0_SIZE - ret);
6735e286361SAndreas Jaekel 
6745e286361SAndreas Jaekel 				/* calculate signature */
6755e286361SAndreas Jaekel 				zev_l0_sig(sig_l0, buf);
6765e286361SAndreas Jaekel 
6775e286361SAndreas Jaekel 				zev_chksum_cache_put(sig_l0, file, cs, pos_l0);
6785e286361SAndreas Jaekel 			}
6795e286361SAndreas Jaekel 
6805e286361SAndreas Jaekel 			if (!memcmp(sig_l0, all_zero_sig, SHA1_DIGEST_LENGTH)) {
6815e286361SAndreas Jaekel 				/* all-zero l0 block.  omit signature. */
6825e286361SAndreas Jaekel 				l0_block_no++;
6835e286361SAndreas Jaekel 				continue;
6845e286361SAndreas Jaekel 			}
6855e286361SAndreas Jaekel 			non_empty_l0_blocks++;
6865e286361SAndreas Jaekel 			zev_l0_blocksig(blk_sig_l0, sig_l0, l0_block_no);
6875e286361SAndreas Jaekel 			zev_l1_add(sig_l1, blk_sig_l0);
6885e286361SAndreas Jaekel 
6895e286361SAndreas Jaekel 			if (((pos_l0 + ZEV_L0_SIZE - 1) >= off) &&
690aae4944bSAndreas Jaekel 			    (pos_l0 <= (off + len - 1))) {
6915e286361SAndreas Jaekel 				zev_append_sig(sig++, 0, pos_l0, sig_l0);
6925e286361SAndreas Jaekel 			}
6935e286361SAndreas Jaekel 
6945e286361SAndreas Jaekel 			l0_block_no++;
6955e286361SAndreas Jaekel 		}
6965e286361SAndreas Jaekel 
6975e286361SAndreas Jaekel 		if (non_empty_l0_blocks && (zp->z_size > ZEV_L0_SIZE))
6985e286361SAndreas Jaekel 			zev_append_sig(sig++, 1, pos_l1, sig_l1);
6995e286361SAndreas Jaekel 	}
7005e286361SAndreas Jaekel 
7015e286361SAndreas Jaekel 	*signature_cnt = ((char *)sig - (char *)*result) / sizeof(zev_sig_t);
7025e286361SAndreas Jaekel 
7035e286361SAndreas Jaekel 	zev_free(buf, ZEV_L0_SIZE);
7045e286361SAndreas Jaekel 	zev_chksum_cache_file_release(file);
7055e286361SAndreas Jaekel 	return 0;
7065e286361SAndreas Jaekel }
707b9710123SAndreas Jaekel 
708b9710123SAndreas Jaekel int
709b9710123SAndreas Jaekel zev_ioc_get_signatures(intptr_t arg, int mode)
710b9710123SAndreas Jaekel {
711b9710123SAndreas Jaekel 	zev_ioctl_get_signatures_t gs;
712b9710123SAndreas Jaekel 	file_t *fp;
713b9710123SAndreas Jaekel 	int ret = 0;
714b9710123SAndreas Jaekel 	znode_t *zp;
715b9710123SAndreas Jaekel 	zev_sig_t *sig_buf = NULL;
716b9710123SAndreas Jaekel 	uint64_t sig_buf_len;
717b9710123SAndreas Jaekel 	uint64_t sig_cnt = 0;
718b9710123SAndreas Jaekel 	uint64_t sig_len;
719b9710123SAndreas Jaekel 	char *dst;
720b9710123SAndreas Jaekel 	int range_locked = 0;
721b9710123SAndreas Jaekel 	rl_t *rl;
722b9710123SAndreas Jaekel 	ssize_t	lock_off;
723b9710123SAndreas Jaekel 	ssize_t lock_len;
724a287bd3aSAndreas Jaekel 	struct zfsvfs *zfsvfs = NULL;
725b9710123SAndreas Jaekel 
726b9710123SAndreas Jaekel 	if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0)
727b9710123SAndreas Jaekel 		return EFAULT;
728b9710123SAndreas Jaekel 	fp = getf(gs.zev_fd);
729b9710123SAndreas Jaekel 	if (fp == NULL)
730b9710123SAndreas Jaekel 		return EBADF;
731b9710123SAndreas Jaekel 	if (fp->f_vnode->v_vfsp->vfs_fstype != zfsfstype) {
732b9710123SAndreas Jaekel 		ret = EINVAL;
733b9710123SAndreas Jaekel 		goto out;
734b9710123SAndreas Jaekel 	}
735a287bd3aSAndreas Jaekel 	zp = VTOZ(fp->f_vnode);
736a287bd3aSAndreas Jaekel 
737a287bd3aSAndreas Jaekel 	/* modified version of ZFS_ENTER() macro - we need to clean up fp */
738a287bd3aSAndreas Jaekel 	zfsvfs = zp->z_zfsvfs;
739a287bd3aSAndreas Jaekel 	rrm_enter_read(&zfsvfs->z_teardown_lock, FTAG);
740a287bd3aSAndreas Jaekel 	if (zp->z_zfsvfs->z_unmounted) {
741a287bd3aSAndreas Jaekel 		ret = EIO;
742a287bd3aSAndreas Jaekel 		goto out;
743a287bd3aSAndreas Jaekel 	}
744a287bd3aSAndreas Jaekel 	/* modified version of ZFS_VERIFY_ZP() macro */
745a287bd3aSAndreas Jaekel 	if (zp->z_sa_hdl == NULL) {
746a287bd3aSAndreas Jaekel 		ret = EIO;
747a287bd3aSAndreas Jaekel 		goto out;
748a287bd3aSAndreas Jaekel 	}
749a287bd3aSAndreas Jaekel 
750b9710123SAndreas Jaekel 	if (fp->f_vnode->v_type != VREG) {
751b9710123SAndreas Jaekel 		ret = EINVAL;
752b9710123SAndreas Jaekel 		goto out;
753b9710123SAndreas Jaekel 	}
754b9710123SAndreas Jaekel 	if (gs.zev_offset >= zp->z_size) {
755b9710123SAndreas Jaekel 		ret = EINVAL;
756b9710123SAndreas Jaekel 		goto out;
757b9710123SAndreas Jaekel 	}
758b9710123SAndreas Jaekel 
759b9710123SAndreas Jaekel 	/* range lock data */
760b9710123SAndreas Jaekel 	lock_off = P2ALIGN(gs.zev_offset, ZEV_L1_SIZE);
761b9710123SAndreas Jaekel 	lock_len = gs.zev_len + (gs.zev_offset - lock_off);
762b9710123SAndreas Jaekel 	lock_len = P2ROUNDUP(lock_len, ZEV_L1_SIZE);
763b9710123SAndreas Jaekel 	rl = zfs_range_lock(zp, lock_off, lock_len, RL_READER);
764b9710123SAndreas Jaekel 	range_locked = 1;
765b9710123SAndreas Jaekel 
766b9710123SAndreas Jaekel 	/* get checksums */
767b9710123SAndreas Jaekel 	ret = zev_get_checksums(&sig_buf, &sig_buf_len, &sig_cnt,
768b9710123SAndreas Jaekel 	                        gs.zev_bufsize,
769b9710123SAndreas Jaekel 	                        zp, gs.zev_offset, gs.zev_len, zev_write);
770b9710123SAndreas Jaekel 	if (ret)
771b9710123SAndreas Jaekel 		goto out;
772b9710123SAndreas Jaekel 
773b9710123SAndreas Jaekel 	/* copy to userland */
774b9710123SAndreas Jaekel 	sig_len = sig_cnt * sizeof(zev_sig_t);
775b9710123SAndreas Jaekel 	gs.zev_signature_cnt = sig_cnt;
776b9710123SAndreas Jaekel 	if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) {
777b9710123SAndreas Jaekel 		ret = EFAULT;
778b9710123SAndreas Jaekel 		goto out;
779b9710123SAndreas Jaekel 	}
780b9710123SAndreas Jaekel 	if (sig_cnt && sig_buf) {
781b9710123SAndreas Jaekel 		dst = (char *)arg + sizeof(gs);
782b9710123SAndreas Jaekel 		if (ddi_copyout(sig_buf, (void *)dst, sig_len, mode) != 0) {
783b9710123SAndreas Jaekel 			ret = EFAULT;
784b9710123SAndreas Jaekel 			goto out;
785b9710123SAndreas Jaekel 		}
786b9710123SAndreas Jaekel 	}
787b9710123SAndreas Jaekel out:
788b9710123SAndreas Jaekel 	if (sig_buf)
789b9710123SAndreas Jaekel 		zev_free(sig_buf, sig_buf_len);
790b9710123SAndreas Jaekel 	if (range_locked)
791b9710123SAndreas Jaekel 		zfs_range_unlock(rl);
792a287bd3aSAndreas Jaekel 	if (zfsvfs)
793a287bd3aSAndreas Jaekel 		ZFS_EXIT(zfsvfs);
794b9710123SAndreas Jaekel 	releasef(gs.zev_fd);
795b9710123SAndreas Jaekel 	return ret;
796b9710123SAndreas Jaekel }
797b9710123SAndreas Jaekel 
7981ca5a13bSAndreas Jaekel void
7991ca5a13bSAndreas Jaekel zev_symlink_checksum(zev_znode_symlink_t *rec, char *link)
8001ca5a13bSAndreas Jaekel {
8011ca5a13bSAndreas Jaekel 	char buf[ZEV_L0_SIZE];
8021ca5a13bSAndreas Jaekel 
8031ca5a13bSAndreas Jaekel 	memset(buf, 0, sizeof(buf));
8041ca5a13bSAndreas Jaekel 	strcpy(buf, link);
8051ca5a13bSAndreas Jaekel 	zev_l0_sig(rec->signature.value, buf);
8061ca5a13bSAndreas Jaekel 	rec->signature.level = 0;
8071ca5a13bSAndreas Jaekel 	rec->signature.block_offset = 0;
8081ca5a13bSAndreas Jaekel }
8091ca5a13bSAndreas Jaekel 
8101ca5a13bSAndreas Jaekel 
8111ca5a13bSAndreas Jaekel void
8121ca5a13bSAndreas Jaekel zev_create_checksum(zev_znode_create_t *rec, znode_t *zp)
8131ca5a13bSAndreas Jaekel {
8141ca5a13bSAndreas Jaekel 	char buf[ZEV_L0_SIZE];
8151ca5a13bSAndreas Jaekel 	vnode_t *vp;
8161ca5a13bSAndreas Jaekel 	uint64_t rdev;
8171ca5a13bSAndreas Jaekel 
8181ca5a13bSAndreas Jaekel 	vp = ZTOV(zp);
8191ca5a13bSAndreas Jaekel 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
8201ca5a13bSAndreas Jaekel 		sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zp->z_zfsvfs),
8211ca5a13bSAndreas Jaekel 		          &rdev, sizeof(rdev));
8221ca5a13bSAndreas Jaekel 		memset(buf, 0, sizeof(buf));
8231ca5a13bSAndreas Jaekel 		snprintf(buf, sizeof(buf), "%c%d,%d",
8241ca5a13bSAndreas Jaekel 		         vp->v_type == VBLK ? 'b' : 'c',
8251ca5a13bSAndreas Jaekel 		         getmajor(rdev),
8261ca5a13bSAndreas Jaekel 		         getminor(rdev));
8271ca5a13bSAndreas Jaekel 		zev_l0_sig(rec->signature.value, buf);
8281ca5a13bSAndreas Jaekel 	} else {
8291ca5a13bSAndreas Jaekel 		memset(rec->signature.value, 0, sizeof(rec->signature.value));
8301ca5a13bSAndreas Jaekel 	}
8311ca5a13bSAndreas Jaekel 	rec->signature.level = 0;
8321ca5a13bSAndreas Jaekel 	rec->signature.block_offset = 0;
8331ca5a13bSAndreas Jaekel }
8341ca5a13bSAndreas Jaekel 
835