xref: /linux/tools/testing/selftests/mm/memory-failure.c (revision 12e8a2fae372c55c17a410929cfa60f96b93d17a)
1ff4ef2fbSMiaohe Lin // SPDX-License-Identifier: GPL-2.0
2ff4ef2fbSMiaohe Lin /*
3ff4ef2fbSMiaohe Lin  * Memory-failure functional tests.
4ff4ef2fbSMiaohe Lin  *
5ff4ef2fbSMiaohe Lin  * Author(s): Miaohe Lin <linmiaohe@huawei.com>
6ff4ef2fbSMiaohe Lin  */
7ff4ef2fbSMiaohe Lin 
8ff4ef2fbSMiaohe Lin #include "../kselftest_harness.h"
9ff4ef2fbSMiaohe Lin 
10ff4ef2fbSMiaohe Lin #include <sys/mman.h>
11ff4ef2fbSMiaohe Lin #include <linux/mman.h>
12ff4ef2fbSMiaohe Lin #include <linux/string.h>
13*12e8a2faSMiaohe Lin #include <unistd.h>
14ff4ef2fbSMiaohe Lin #include <signal.h>
15ff4ef2fbSMiaohe Lin #include <setjmp.h>
16ff4ef2fbSMiaohe Lin #include <unistd.h>
17ff4ef2fbSMiaohe Lin #include <fcntl.h>
18*12e8a2faSMiaohe Lin #include <sys/vfs.h>
19*12e8a2faSMiaohe Lin #include <linux/magic.h>
20*12e8a2faSMiaohe Lin #include <errno.h>
21ff4ef2fbSMiaohe Lin 
22ff4ef2fbSMiaohe Lin #include "vm_util.h"
23ff4ef2fbSMiaohe Lin 
24ff4ef2fbSMiaohe Lin enum inject_type {
25ff4ef2fbSMiaohe Lin 	MADV_HARD,
26ff4ef2fbSMiaohe Lin 	MADV_SOFT,
27ff4ef2fbSMiaohe Lin };
28ff4ef2fbSMiaohe Lin 
29ff4ef2fbSMiaohe Lin enum result_type {
30ff4ef2fbSMiaohe Lin 	MADV_HARD_ANON,
31*12e8a2faSMiaohe Lin 	MADV_HARD_CLEAN_PAGECACHE,
32ff4ef2fbSMiaohe Lin 	MADV_SOFT_ANON,
33*12e8a2faSMiaohe Lin 	MADV_SOFT_CLEAN_PAGECACHE,
34ff4ef2fbSMiaohe Lin };
35ff4ef2fbSMiaohe Lin 
36ff4ef2fbSMiaohe Lin static jmp_buf signal_jmp_buf;
37ff4ef2fbSMiaohe Lin static siginfo_t siginfo;
38ff4ef2fbSMiaohe Lin const char *pagemap_proc = "/proc/self/pagemap";
39ff4ef2fbSMiaohe Lin const char *kpageflags_proc = "/proc/kpageflags";
40ff4ef2fbSMiaohe Lin 
41ff4ef2fbSMiaohe Lin FIXTURE(memory_failure)
42ff4ef2fbSMiaohe Lin {
43ff4ef2fbSMiaohe Lin 	unsigned long page_size;
44ff4ef2fbSMiaohe Lin 	unsigned long corrupted_size;
45ff4ef2fbSMiaohe Lin 	unsigned long pfn;
46ff4ef2fbSMiaohe Lin 	int pagemap_fd;
47ff4ef2fbSMiaohe Lin 	int kpageflags_fd;
48ff4ef2fbSMiaohe Lin 	bool triggered;
49ff4ef2fbSMiaohe Lin };
50ff4ef2fbSMiaohe Lin 
51ff4ef2fbSMiaohe Lin FIXTURE_VARIANT(memory_failure)
52ff4ef2fbSMiaohe Lin {
53ff4ef2fbSMiaohe Lin 	enum inject_type type;
54ff4ef2fbSMiaohe Lin 	int (*inject)(FIXTURE_DATA(memory_failure) * self, void *vaddr);
55ff4ef2fbSMiaohe Lin };
56ff4ef2fbSMiaohe Lin 
57ff4ef2fbSMiaohe Lin static int madv_hard_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr)
58ff4ef2fbSMiaohe Lin {
59ff4ef2fbSMiaohe Lin 	return madvise(vaddr, self->page_size, MADV_HWPOISON);
60ff4ef2fbSMiaohe Lin }
61ff4ef2fbSMiaohe Lin 
62ff4ef2fbSMiaohe Lin FIXTURE_VARIANT_ADD(memory_failure, madv_hard)
63ff4ef2fbSMiaohe Lin {
64ff4ef2fbSMiaohe Lin 	.type = MADV_HARD,
65ff4ef2fbSMiaohe Lin 	.inject = madv_hard_inject,
66ff4ef2fbSMiaohe Lin };
67ff4ef2fbSMiaohe Lin 
68ff4ef2fbSMiaohe Lin static int madv_soft_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr)
69ff4ef2fbSMiaohe Lin {
70ff4ef2fbSMiaohe Lin 	return madvise(vaddr, self->page_size, MADV_SOFT_OFFLINE);
71ff4ef2fbSMiaohe Lin }
72ff4ef2fbSMiaohe Lin 
73ff4ef2fbSMiaohe Lin FIXTURE_VARIANT_ADD(memory_failure, madv_soft)
74ff4ef2fbSMiaohe Lin {
75ff4ef2fbSMiaohe Lin 	.type = MADV_SOFT,
76ff4ef2fbSMiaohe Lin 	.inject = madv_soft_inject,
77ff4ef2fbSMiaohe Lin };
78ff4ef2fbSMiaohe Lin 
79ff4ef2fbSMiaohe Lin static void sigbus_action(int signo, siginfo_t *si, void *args)
80ff4ef2fbSMiaohe Lin {
81ff4ef2fbSMiaohe Lin 	memcpy(&siginfo, si, sizeof(siginfo_t));
82ff4ef2fbSMiaohe Lin 	siglongjmp(signal_jmp_buf, 1);
83ff4ef2fbSMiaohe Lin }
84ff4ef2fbSMiaohe Lin 
85ff4ef2fbSMiaohe Lin static int setup_sighandler(void)
86ff4ef2fbSMiaohe Lin {
87ff4ef2fbSMiaohe Lin 	struct sigaction sa = {
88ff4ef2fbSMiaohe Lin 		.sa_sigaction = sigbus_action,
89ff4ef2fbSMiaohe Lin 		.sa_flags = SA_SIGINFO,
90ff4ef2fbSMiaohe Lin 	};
91ff4ef2fbSMiaohe Lin 
92ff4ef2fbSMiaohe Lin 	return sigaction(SIGBUS, &sa, NULL);
93ff4ef2fbSMiaohe Lin }
94ff4ef2fbSMiaohe Lin 
95ff4ef2fbSMiaohe Lin FIXTURE_SETUP(memory_failure)
96ff4ef2fbSMiaohe Lin {
97ff4ef2fbSMiaohe Lin 	memset(self, 0, sizeof(*self));
98ff4ef2fbSMiaohe Lin 
99ff4ef2fbSMiaohe Lin 	self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
100ff4ef2fbSMiaohe Lin 
101ff4ef2fbSMiaohe Lin 	memset(&siginfo, 0, sizeof(siginfo));
102ff4ef2fbSMiaohe Lin 	if (setup_sighandler())
103ff4ef2fbSMiaohe Lin 		SKIP(return, "setup sighandler failed.\n");
104ff4ef2fbSMiaohe Lin 
105ff4ef2fbSMiaohe Lin 	self->pagemap_fd = open(pagemap_proc, O_RDONLY);
106ff4ef2fbSMiaohe Lin 	if (self->pagemap_fd == -1)
107ff4ef2fbSMiaohe Lin 		SKIP(return, "open %s failed.\n", pagemap_proc);
108ff4ef2fbSMiaohe Lin 
109ff4ef2fbSMiaohe Lin 	self->kpageflags_fd = open(kpageflags_proc, O_RDONLY);
110ff4ef2fbSMiaohe Lin 	if (self->kpageflags_fd == -1)
111ff4ef2fbSMiaohe Lin 		SKIP(return, "open %s failed.\n", kpageflags_proc);
112ff4ef2fbSMiaohe Lin }
113ff4ef2fbSMiaohe Lin 
114ff4ef2fbSMiaohe Lin static void teardown_sighandler(void)
115ff4ef2fbSMiaohe Lin {
116ff4ef2fbSMiaohe Lin 	struct sigaction sa = {
117ff4ef2fbSMiaohe Lin 		.sa_handler = SIG_DFL,
118ff4ef2fbSMiaohe Lin 		.sa_flags = SA_SIGINFO,
119ff4ef2fbSMiaohe Lin 	};
120ff4ef2fbSMiaohe Lin 
121ff4ef2fbSMiaohe Lin 	sigaction(SIGBUS, &sa, NULL);
122ff4ef2fbSMiaohe Lin }
123ff4ef2fbSMiaohe Lin 
124ff4ef2fbSMiaohe Lin FIXTURE_TEARDOWN(memory_failure)
125ff4ef2fbSMiaohe Lin {
126ff4ef2fbSMiaohe Lin 	close(self->kpageflags_fd);
127ff4ef2fbSMiaohe Lin 	close(self->pagemap_fd);
128ff4ef2fbSMiaohe Lin 	teardown_sighandler();
129ff4ef2fbSMiaohe Lin }
130ff4ef2fbSMiaohe Lin 
131ff4ef2fbSMiaohe Lin static void prepare(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
132ff4ef2fbSMiaohe Lin 		    void *vaddr)
133ff4ef2fbSMiaohe Lin {
134ff4ef2fbSMiaohe Lin 	self->pfn = pagemap_get_pfn(self->pagemap_fd, vaddr);
135ff4ef2fbSMiaohe Lin 	ASSERT_NE(self->pfn, -1UL);
136ff4ef2fbSMiaohe Lin 
137ff4ef2fbSMiaohe Lin 	ASSERT_EQ(get_hardware_corrupted_size(&self->corrupted_size), 0);
138ff4ef2fbSMiaohe Lin }
139ff4ef2fbSMiaohe Lin 
140ff4ef2fbSMiaohe Lin static bool check_memory(void *vaddr, unsigned long size)
141ff4ef2fbSMiaohe Lin {
142ff4ef2fbSMiaohe Lin 	char buf[64];
143ff4ef2fbSMiaohe Lin 
144ff4ef2fbSMiaohe Lin 	memset(buf, 0xce, sizeof(buf));
145ff4ef2fbSMiaohe Lin 	while (size >= sizeof(buf)) {
146ff4ef2fbSMiaohe Lin 		if (memcmp(vaddr, buf, sizeof(buf)))
147ff4ef2fbSMiaohe Lin 			return false;
148ff4ef2fbSMiaohe Lin 		size -= sizeof(buf);
149ff4ef2fbSMiaohe Lin 		vaddr += sizeof(buf);
150ff4ef2fbSMiaohe Lin 	}
151ff4ef2fbSMiaohe Lin 
152ff4ef2fbSMiaohe Lin 	return true;
153ff4ef2fbSMiaohe Lin }
154ff4ef2fbSMiaohe Lin 
155ff4ef2fbSMiaohe Lin static void check(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
156ff4ef2fbSMiaohe Lin 		  void *vaddr, enum result_type type, int setjmp)
157ff4ef2fbSMiaohe Lin {
158ff4ef2fbSMiaohe Lin 	unsigned long size;
159ff4ef2fbSMiaohe Lin 	uint64_t pfn_flags;
160ff4ef2fbSMiaohe Lin 
161ff4ef2fbSMiaohe Lin 	switch (type) {
162ff4ef2fbSMiaohe Lin 	case MADV_SOFT_ANON:
163*12e8a2faSMiaohe Lin 	case MADV_HARD_CLEAN_PAGECACHE:
164*12e8a2faSMiaohe Lin 	case MADV_SOFT_CLEAN_PAGECACHE:
165ff4ef2fbSMiaohe Lin 		/* It is not expected to receive a SIGBUS signal. */
166ff4ef2fbSMiaohe Lin 		ASSERT_EQ(setjmp, 0);
167ff4ef2fbSMiaohe Lin 
168ff4ef2fbSMiaohe Lin 		/* The page content should remain unchanged. */
169ff4ef2fbSMiaohe Lin 		ASSERT_TRUE(check_memory(vaddr, self->page_size));
170ff4ef2fbSMiaohe Lin 
171ff4ef2fbSMiaohe Lin 		/* The backing pfn of addr should have changed. */
172ff4ef2fbSMiaohe Lin 		ASSERT_NE(pagemap_get_pfn(self->pagemap_fd, vaddr), self->pfn);
173ff4ef2fbSMiaohe Lin 		break;
174ff4ef2fbSMiaohe Lin 	case MADV_HARD_ANON:
175ff4ef2fbSMiaohe Lin 		/* The SIGBUS signal should have been received. */
176ff4ef2fbSMiaohe Lin 		ASSERT_EQ(setjmp, 1);
177ff4ef2fbSMiaohe Lin 
178ff4ef2fbSMiaohe Lin 		/* Check if siginfo contains correct SIGBUS context. */
179ff4ef2fbSMiaohe Lin 		ASSERT_EQ(siginfo.si_signo, SIGBUS);
180ff4ef2fbSMiaohe Lin 		ASSERT_EQ(siginfo.si_code, BUS_MCEERR_AR);
181ff4ef2fbSMiaohe Lin 		ASSERT_EQ(1UL << siginfo.si_addr_lsb, self->page_size);
182ff4ef2fbSMiaohe Lin 		ASSERT_EQ(siginfo.si_addr, vaddr);
183ff4ef2fbSMiaohe Lin 
184ff4ef2fbSMiaohe Lin 		/* XXX Check backing pte is hwpoison entry when supported. */
185ff4ef2fbSMiaohe Lin 		ASSERT_TRUE(pagemap_is_swapped(self->pagemap_fd, vaddr));
186ff4ef2fbSMiaohe Lin 		break;
187ff4ef2fbSMiaohe Lin 	default:
188ff4ef2fbSMiaohe Lin 		SKIP(return, "unexpected inject type %d.\n", type);
189ff4ef2fbSMiaohe Lin 	}
190ff4ef2fbSMiaohe Lin 
191ff4ef2fbSMiaohe Lin 	/* Check if the value of HardwareCorrupted has increased. */
192ff4ef2fbSMiaohe Lin 	ASSERT_EQ(get_hardware_corrupted_size(&size), 0);
193ff4ef2fbSMiaohe Lin 	ASSERT_EQ(size, self->corrupted_size + self->page_size / 1024);
194ff4ef2fbSMiaohe Lin 
195ff4ef2fbSMiaohe Lin 	/* Check if HWPoison flag is set. */
196ff4ef2fbSMiaohe Lin 	ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0);
197ff4ef2fbSMiaohe Lin 	ASSERT_EQ(pfn_flags & KPF_HWPOISON, KPF_HWPOISON);
198ff4ef2fbSMiaohe Lin }
199ff4ef2fbSMiaohe Lin 
200ff4ef2fbSMiaohe Lin static void cleanup(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
201ff4ef2fbSMiaohe Lin 		    void *vaddr)
202ff4ef2fbSMiaohe Lin {
203ff4ef2fbSMiaohe Lin 	unsigned long size;
204ff4ef2fbSMiaohe Lin 	uint64_t pfn_flags;
205ff4ef2fbSMiaohe Lin 
206ff4ef2fbSMiaohe Lin 	ASSERT_EQ(unpoison_memory(self->pfn), 0);
207ff4ef2fbSMiaohe Lin 
208ff4ef2fbSMiaohe Lin 	/* Check if HWPoison flag is cleared. */
209ff4ef2fbSMiaohe Lin 	ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0);
210ff4ef2fbSMiaohe Lin 	ASSERT_NE(pfn_flags & KPF_HWPOISON, KPF_HWPOISON);
211ff4ef2fbSMiaohe Lin 
212ff4ef2fbSMiaohe Lin 	/* Check if the value of HardwareCorrupted has decreased. */
213ff4ef2fbSMiaohe Lin 	ASSERT_EQ(get_hardware_corrupted_size(&size), 0);
214ff4ef2fbSMiaohe Lin 	ASSERT_EQ(size, self->corrupted_size);
215ff4ef2fbSMiaohe Lin }
216ff4ef2fbSMiaohe Lin 
217ff4ef2fbSMiaohe Lin TEST_F(memory_failure, anon)
218ff4ef2fbSMiaohe Lin {
219ff4ef2fbSMiaohe Lin 	char *addr;
220ff4ef2fbSMiaohe Lin 	int ret;
221ff4ef2fbSMiaohe Lin 
222ff4ef2fbSMiaohe Lin 	addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE,
223ff4ef2fbSMiaohe Lin 		    MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
224ff4ef2fbSMiaohe Lin 	if (addr == MAP_FAILED)
225ff4ef2fbSMiaohe Lin 		SKIP(return, "mmap failed, not enough memory.\n");
226ff4ef2fbSMiaohe Lin 	memset(addr, 0xce, self->page_size);
227ff4ef2fbSMiaohe Lin 
228ff4ef2fbSMiaohe Lin 	prepare(_metadata, self, addr);
229ff4ef2fbSMiaohe Lin 
230ff4ef2fbSMiaohe Lin 	ret = sigsetjmp(signal_jmp_buf, 1);
231ff4ef2fbSMiaohe Lin 	if (!self->triggered) {
232ff4ef2fbSMiaohe Lin 		self->triggered = true;
233ff4ef2fbSMiaohe Lin 		ASSERT_EQ(variant->inject(self, addr), 0);
234ff4ef2fbSMiaohe Lin 		FORCE_READ(*addr);
235ff4ef2fbSMiaohe Lin 	}
236ff4ef2fbSMiaohe Lin 
237ff4ef2fbSMiaohe Lin 	if (variant->type == MADV_HARD)
238ff4ef2fbSMiaohe Lin 		check(_metadata, self, addr, MADV_HARD_ANON, ret);
239ff4ef2fbSMiaohe Lin 	else
240ff4ef2fbSMiaohe Lin 		check(_metadata, self, addr, MADV_SOFT_ANON, ret);
241ff4ef2fbSMiaohe Lin 
242ff4ef2fbSMiaohe Lin 	cleanup(_metadata, self, addr);
243ff4ef2fbSMiaohe Lin 
244ff4ef2fbSMiaohe Lin 	ASSERT_EQ(munmap(addr, self->page_size), 0);
245ff4ef2fbSMiaohe Lin }
246ff4ef2fbSMiaohe Lin 
247*12e8a2faSMiaohe Lin /* Borrowed from mm/gup_longterm.c. */
248*12e8a2faSMiaohe Lin static int get_fs_type(int fd)
249*12e8a2faSMiaohe Lin {
250*12e8a2faSMiaohe Lin 	struct statfs fs;
251*12e8a2faSMiaohe Lin 	int ret;
252*12e8a2faSMiaohe Lin 
253*12e8a2faSMiaohe Lin 	do {
254*12e8a2faSMiaohe Lin 		ret = fstatfs(fd, &fs);
255*12e8a2faSMiaohe Lin 	} while (ret && errno == EINTR);
256*12e8a2faSMiaohe Lin 
257*12e8a2faSMiaohe Lin 	return ret ? 0 : (int)fs.f_type;
258*12e8a2faSMiaohe Lin }
259*12e8a2faSMiaohe Lin 
260*12e8a2faSMiaohe Lin TEST_F(memory_failure, clean_pagecache)
261*12e8a2faSMiaohe Lin {
262*12e8a2faSMiaohe Lin 	const char *fname = "./clean-page-cache-test-file";
263*12e8a2faSMiaohe Lin 	int fd;
264*12e8a2faSMiaohe Lin 	char *addr;
265*12e8a2faSMiaohe Lin 	int ret;
266*12e8a2faSMiaohe Lin 	int fs_type;
267*12e8a2faSMiaohe Lin 
268*12e8a2faSMiaohe Lin 	fd = open(fname, O_RDWR | O_CREAT, 0664);
269*12e8a2faSMiaohe Lin 	if (fd < 0)
270*12e8a2faSMiaohe Lin 		SKIP(return, "failed to open test file.\n");
271*12e8a2faSMiaohe Lin 	unlink(fname);
272*12e8a2faSMiaohe Lin 	ftruncate(fd, self->page_size);
273*12e8a2faSMiaohe Lin 	fs_type = get_fs_type(fd);
274*12e8a2faSMiaohe Lin 	if (!fs_type || fs_type == TMPFS_MAGIC)
275*12e8a2faSMiaohe Lin 		SKIP(return, "unsupported filesystem :%x\n", fs_type);
276*12e8a2faSMiaohe Lin 
277*12e8a2faSMiaohe Lin 	addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE,
278*12e8a2faSMiaohe Lin 		    MAP_SHARED, fd, 0);
279*12e8a2faSMiaohe Lin 	if (addr == MAP_FAILED)
280*12e8a2faSMiaohe Lin 		SKIP(return, "mmap failed, not enough memory.\n");
281*12e8a2faSMiaohe Lin 	memset(addr, 0xce, self->page_size);
282*12e8a2faSMiaohe Lin 	fsync(fd);
283*12e8a2faSMiaohe Lin 
284*12e8a2faSMiaohe Lin 	prepare(_metadata, self, addr);
285*12e8a2faSMiaohe Lin 
286*12e8a2faSMiaohe Lin 	ret = sigsetjmp(signal_jmp_buf, 1);
287*12e8a2faSMiaohe Lin 	if (!self->triggered) {
288*12e8a2faSMiaohe Lin 		self->triggered = true;
289*12e8a2faSMiaohe Lin 		ASSERT_EQ(variant->inject(self, addr), 0);
290*12e8a2faSMiaohe Lin 		FORCE_READ(*addr);
291*12e8a2faSMiaohe Lin 	}
292*12e8a2faSMiaohe Lin 
293*12e8a2faSMiaohe Lin 	if (variant->type == MADV_HARD)
294*12e8a2faSMiaohe Lin 		check(_metadata, self, addr, MADV_HARD_CLEAN_PAGECACHE, ret);
295*12e8a2faSMiaohe Lin 	else
296*12e8a2faSMiaohe Lin 		check(_metadata, self, addr, MADV_SOFT_CLEAN_PAGECACHE, ret);
297*12e8a2faSMiaohe Lin 
298*12e8a2faSMiaohe Lin 	cleanup(_metadata, self, addr);
299*12e8a2faSMiaohe Lin 
300*12e8a2faSMiaohe Lin 	ASSERT_EQ(munmap(addr, self->page_size), 0);
301*12e8a2faSMiaohe Lin 
302*12e8a2faSMiaohe Lin 	ASSERT_EQ(close(fd), 0);
303*12e8a2faSMiaohe Lin }
304*12e8a2faSMiaohe Lin 
305ff4ef2fbSMiaohe Lin TEST_HARNESS_MAIN
306