1ff4ef2fbSMiaohe Lin // SPDX-License-Identifier: GPL-2.0 2ff4ef2fbSMiaohe Lin /* 3ff4ef2fbSMiaohe Lin * Memory-failure functional tests. 4ff4ef2fbSMiaohe Lin * 5ff4ef2fbSMiaohe Lin * Author(s): Miaohe Lin <linmiaohe@huawei.com> 6ff4ef2fbSMiaohe Lin */ 7ff4ef2fbSMiaohe Lin 8ff4ef2fbSMiaohe Lin #include "../kselftest_harness.h" 9ff4ef2fbSMiaohe Lin 10ff4ef2fbSMiaohe Lin #include <sys/mman.h> 11ff4ef2fbSMiaohe Lin #include <linux/mman.h> 12ff4ef2fbSMiaohe Lin #include <linux/string.h> 13*12e8a2faSMiaohe Lin #include <unistd.h> 14ff4ef2fbSMiaohe Lin #include <signal.h> 15ff4ef2fbSMiaohe Lin #include <setjmp.h> 16ff4ef2fbSMiaohe Lin #include <unistd.h> 17ff4ef2fbSMiaohe Lin #include <fcntl.h> 18*12e8a2faSMiaohe Lin #include <sys/vfs.h> 19*12e8a2faSMiaohe Lin #include <linux/magic.h> 20*12e8a2faSMiaohe Lin #include <errno.h> 21ff4ef2fbSMiaohe Lin 22ff4ef2fbSMiaohe Lin #include "vm_util.h" 23ff4ef2fbSMiaohe Lin 24ff4ef2fbSMiaohe Lin enum inject_type { 25ff4ef2fbSMiaohe Lin MADV_HARD, 26ff4ef2fbSMiaohe Lin MADV_SOFT, 27ff4ef2fbSMiaohe Lin }; 28ff4ef2fbSMiaohe Lin 29ff4ef2fbSMiaohe Lin enum result_type { 30ff4ef2fbSMiaohe Lin MADV_HARD_ANON, 31*12e8a2faSMiaohe Lin MADV_HARD_CLEAN_PAGECACHE, 32ff4ef2fbSMiaohe Lin MADV_SOFT_ANON, 33*12e8a2faSMiaohe Lin MADV_SOFT_CLEAN_PAGECACHE, 34ff4ef2fbSMiaohe Lin }; 35ff4ef2fbSMiaohe Lin 36ff4ef2fbSMiaohe Lin static jmp_buf signal_jmp_buf; 37ff4ef2fbSMiaohe Lin static siginfo_t siginfo; 38ff4ef2fbSMiaohe Lin const char *pagemap_proc = "/proc/self/pagemap"; 39ff4ef2fbSMiaohe Lin const char *kpageflags_proc = "/proc/kpageflags"; 40ff4ef2fbSMiaohe Lin 41ff4ef2fbSMiaohe Lin FIXTURE(memory_failure) 42ff4ef2fbSMiaohe Lin { 43ff4ef2fbSMiaohe Lin unsigned long page_size; 44ff4ef2fbSMiaohe Lin unsigned long corrupted_size; 45ff4ef2fbSMiaohe Lin unsigned long pfn; 46ff4ef2fbSMiaohe Lin int pagemap_fd; 47ff4ef2fbSMiaohe Lin int kpageflags_fd; 48ff4ef2fbSMiaohe Lin bool triggered; 49ff4ef2fbSMiaohe Lin }; 50ff4ef2fbSMiaohe Lin 51ff4ef2fbSMiaohe Lin FIXTURE_VARIANT(memory_failure) 52ff4ef2fbSMiaohe Lin { 53ff4ef2fbSMiaohe Lin enum inject_type type; 54ff4ef2fbSMiaohe Lin int (*inject)(FIXTURE_DATA(memory_failure) * self, void *vaddr); 55ff4ef2fbSMiaohe Lin }; 56ff4ef2fbSMiaohe Lin 57ff4ef2fbSMiaohe Lin static int madv_hard_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr) 58ff4ef2fbSMiaohe Lin { 59ff4ef2fbSMiaohe Lin return madvise(vaddr, self->page_size, MADV_HWPOISON); 60ff4ef2fbSMiaohe Lin } 61ff4ef2fbSMiaohe Lin 62ff4ef2fbSMiaohe Lin FIXTURE_VARIANT_ADD(memory_failure, madv_hard) 63ff4ef2fbSMiaohe Lin { 64ff4ef2fbSMiaohe Lin .type = MADV_HARD, 65ff4ef2fbSMiaohe Lin .inject = madv_hard_inject, 66ff4ef2fbSMiaohe Lin }; 67ff4ef2fbSMiaohe Lin 68ff4ef2fbSMiaohe Lin static int madv_soft_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr) 69ff4ef2fbSMiaohe Lin { 70ff4ef2fbSMiaohe Lin return madvise(vaddr, self->page_size, MADV_SOFT_OFFLINE); 71ff4ef2fbSMiaohe Lin } 72ff4ef2fbSMiaohe Lin 73ff4ef2fbSMiaohe Lin FIXTURE_VARIANT_ADD(memory_failure, madv_soft) 74ff4ef2fbSMiaohe Lin { 75ff4ef2fbSMiaohe Lin .type = MADV_SOFT, 76ff4ef2fbSMiaohe Lin .inject = madv_soft_inject, 77ff4ef2fbSMiaohe Lin }; 78ff4ef2fbSMiaohe Lin 79ff4ef2fbSMiaohe Lin static void sigbus_action(int signo, siginfo_t *si, void *args) 80ff4ef2fbSMiaohe Lin { 81ff4ef2fbSMiaohe Lin memcpy(&siginfo, si, sizeof(siginfo_t)); 82ff4ef2fbSMiaohe Lin siglongjmp(signal_jmp_buf, 1); 83ff4ef2fbSMiaohe Lin } 84ff4ef2fbSMiaohe Lin 85ff4ef2fbSMiaohe Lin static int setup_sighandler(void) 86ff4ef2fbSMiaohe Lin { 87ff4ef2fbSMiaohe Lin struct sigaction sa = { 88ff4ef2fbSMiaohe Lin .sa_sigaction = sigbus_action, 89ff4ef2fbSMiaohe Lin .sa_flags = SA_SIGINFO, 90ff4ef2fbSMiaohe Lin }; 91ff4ef2fbSMiaohe Lin 92ff4ef2fbSMiaohe Lin return sigaction(SIGBUS, &sa, NULL); 93ff4ef2fbSMiaohe Lin } 94ff4ef2fbSMiaohe Lin 95ff4ef2fbSMiaohe Lin FIXTURE_SETUP(memory_failure) 96ff4ef2fbSMiaohe Lin { 97ff4ef2fbSMiaohe Lin memset(self, 0, sizeof(*self)); 98ff4ef2fbSMiaohe Lin 99ff4ef2fbSMiaohe Lin self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); 100ff4ef2fbSMiaohe Lin 101ff4ef2fbSMiaohe Lin memset(&siginfo, 0, sizeof(siginfo)); 102ff4ef2fbSMiaohe Lin if (setup_sighandler()) 103ff4ef2fbSMiaohe Lin SKIP(return, "setup sighandler failed.\n"); 104ff4ef2fbSMiaohe Lin 105ff4ef2fbSMiaohe Lin self->pagemap_fd = open(pagemap_proc, O_RDONLY); 106ff4ef2fbSMiaohe Lin if (self->pagemap_fd == -1) 107ff4ef2fbSMiaohe Lin SKIP(return, "open %s failed.\n", pagemap_proc); 108ff4ef2fbSMiaohe Lin 109ff4ef2fbSMiaohe Lin self->kpageflags_fd = open(kpageflags_proc, O_RDONLY); 110ff4ef2fbSMiaohe Lin if (self->kpageflags_fd == -1) 111ff4ef2fbSMiaohe Lin SKIP(return, "open %s failed.\n", kpageflags_proc); 112ff4ef2fbSMiaohe Lin } 113ff4ef2fbSMiaohe Lin 114ff4ef2fbSMiaohe Lin static void teardown_sighandler(void) 115ff4ef2fbSMiaohe Lin { 116ff4ef2fbSMiaohe Lin struct sigaction sa = { 117ff4ef2fbSMiaohe Lin .sa_handler = SIG_DFL, 118ff4ef2fbSMiaohe Lin .sa_flags = SA_SIGINFO, 119ff4ef2fbSMiaohe Lin }; 120ff4ef2fbSMiaohe Lin 121ff4ef2fbSMiaohe Lin sigaction(SIGBUS, &sa, NULL); 122ff4ef2fbSMiaohe Lin } 123ff4ef2fbSMiaohe Lin 124ff4ef2fbSMiaohe Lin FIXTURE_TEARDOWN(memory_failure) 125ff4ef2fbSMiaohe Lin { 126ff4ef2fbSMiaohe Lin close(self->kpageflags_fd); 127ff4ef2fbSMiaohe Lin close(self->pagemap_fd); 128ff4ef2fbSMiaohe Lin teardown_sighandler(); 129ff4ef2fbSMiaohe Lin } 130ff4ef2fbSMiaohe Lin 131ff4ef2fbSMiaohe Lin static void prepare(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self, 132ff4ef2fbSMiaohe Lin void *vaddr) 133ff4ef2fbSMiaohe Lin { 134ff4ef2fbSMiaohe Lin self->pfn = pagemap_get_pfn(self->pagemap_fd, vaddr); 135ff4ef2fbSMiaohe Lin ASSERT_NE(self->pfn, -1UL); 136ff4ef2fbSMiaohe Lin 137ff4ef2fbSMiaohe Lin ASSERT_EQ(get_hardware_corrupted_size(&self->corrupted_size), 0); 138ff4ef2fbSMiaohe Lin } 139ff4ef2fbSMiaohe Lin 140ff4ef2fbSMiaohe Lin static bool check_memory(void *vaddr, unsigned long size) 141ff4ef2fbSMiaohe Lin { 142ff4ef2fbSMiaohe Lin char buf[64]; 143ff4ef2fbSMiaohe Lin 144ff4ef2fbSMiaohe Lin memset(buf, 0xce, sizeof(buf)); 145ff4ef2fbSMiaohe Lin while (size >= sizeof(buf)) { 146ff4ef2fbSMiaohe Lin if (memcmp(vaddr, buf, sizeof(buf))) 147ff4ef2fbSMiaohe Lin return false; 148ff4ef2fbSMiaohe Lin size -= sizeof(buf); 149ff4ef2fbSMiaohe Lin vaddr += sizeof(buf); 150ff4ef2fbSMiaohe Lin } 151ff4ef2fbSMiaohe Lin 152ff4ef2fbSMiaohe Lin return true; 153ff4ef2fbSMiaohe Lin } 154ff4ef2fbSMiaohe Lin 155ff4ef2fbSMiaohe Lin static void check(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self, 156ff4ef2fbSMiaohe Lin void *vaddr, enum result_type type, int setjmp) 157ff4ef2fbSMiaohe Lin { 158ff4ef2fbSMiaohe Lin unsigned long size; 159ff4ef2fbSMiaohe Lin uint64_t pfn_flags; 160ff4ef2fbSMiaohe Lin 161ff4ef2fbSMiaohe Lin switch (type) { 162ff4ef2fbSMiaohe Lin case MADV_SOFT_ANON: 163*12e8a2faSMiaohe Lin case MADV_HARD_CLEAN_PAGECACHE: 164*12e8a2faSMiaohe Lin case MADV_SOFT_CLEAN_PAGECACHE: 165ff4ef2fbSMiaohe Lin /* It is not expected to receive a SIGBUS signal. */ 166ff4ef2fbSMiaohe Lin ASSERT_EQ(setjmp, 0); 167ff4ef2fbSMiaohe Lin 168ff4ef2fbSMiaohe Lin /* The page content should remain unchanged. */ 169ff4ef2fbSMiaohe Lin ASSERT_TRUE(check_memory(vaddr, self->page_size)); 170ff4ef2fbSMiaohe Lin 171ff4ef2fbSMiaohe Lin /* The backing pfn of addr should have changed. */ 172ff4ef2fbSMiaohe Lin ASSERT_NE(pagemap_get_pfn(self->pagemap_fd, vaddr), self->pfn); 173ff4ef2fbSMiaohe Lin break; 174ff4ef2fbSMiaohe Lin case MADV_HARD_ANON: 175ff4ef2fbSMiaohe Lin /* The SIGBUS signal should have been received. */ 176ff4ef2fbSMiaohe Lin ASSERT_EQ(setjmp, 1); 177ff4ef2fbSMiaohe Lin 178ff4ef2fbSMiaohe Lin /* Check if siginfo contains correct SIGBUS context. */ 179ff4ef2fbSMiaohe Lin ASSERT_EQ(siginfo.si_signo, SIGBUS); 180ff4ef2fbSMiaohe Lin ASSERT_EQ(siginfo.si_code, BUS_MCEERR_AR); 181ff4ef2fbSMiaohe Lin ASSERT_EQ(1UL << siginfo.si_addr_lsb, self->page_size); 182ff4ef2fbSMiaohe Lin ASSERT_EQ(siginfo.si_addr, vaddr); 183ff4ef2fbSMiaohe Lin 184ff4ef2fbSMiaohe Lin /* XXX Check backing pte is hwpoison entry when supported. */ 185ff4ef2fbSMiaohe Lin ASSERT_TRUE(pagemap_is_swapped(self->pagemap_fd, vaddr)); 186ff4ef2fbSMiaohe Lin break; 187ff4ef2fbSMiaohe Lin default: 188ff4ef2fbSMiaohe Lin SKIP(return, "unexpected inject type %d.\n", type); 189ff4ef2fbSMiaohe Lin } 190ff4ef2fbSMiaohe Lin 191ff4ef2fbSMiaohe Lin /* Check if the value of HardwareCorrupted has increased. */ 192ff4ef2fbSMiaohe Lin ASSERT_EQ(get_hardware_corrupted_size(&size), 0); 193ff4ef2fbSMiaohe Lin ASSERT_EQ(size, self->corrupted_size + self->page_size / 1024); 194ff4ef2fbSMiaohe Lin 195ff4ef2fbSMiaohe Lin /* Check if HWPoison flag is set. */ 196ff4ef2fbSMiaohe Lin ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0); 197ff4ef2fbSMiaohe Lin ASSERT_EQ(pfn_flags & KPF_HWPOISON, KPF_HWPOISON); 198ff4ef2fbSMiaohe Lin } 199ff4ef2fbSMiaohe Lin 200ff4ef2fbSMiaohe Lin static void cleanup(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self, 201ff4ef2fbSMiaohe Lin void *vaddr) 202ff4ef2fbSMiaohe Lin { 203ff4ef2fbSMiaohe Lin unsigned long size; 204ff4ef2fbSMiaohe Lin uint64_t pfn_flags; 205ff4ef2fbSMiaohe Lin 206ff4ef2fbSMiaohe Lin ASSERT_EQ(unpoison_memory(self->pfn), 0); 207ff4ef2fbSMiaohe Lin 208ff4ef2fbSMiaohe Lin /* Check if HWPoison flag is cleared. */ 209ff4ef2fbSMiaohe Lin ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0); 210ff4ef2fbSMiaohe Lin ASSERT_NE(pfn_flags & KPF_HWPOISON, KPF_HWPOISON); 211ff4ef2fbSMiaohe Lin 212ff4ef2fbSMiaohe Lin /* Check if the value of HardwareCorrupted has decreased. */ 213ff4ef2fbSMiaohe Lin ASSERT_EQ(get_hardware_corrupted_size(&size), 0); 214ff4ef2fbSMiaohe Lin ASSERT_EQ(size, self->corrupted_size); 215ff4ef2fbSMiaohe Lin } 216ff4ef2fbSMiaohe Lin 217ff4ef2fbSMiaohe Lin TEST_F(memory_failure, anon) 218ff4ef2fbSMiaohe Lin { 219ff4ef2fbSMiaohe Lin char *addr; 220ff4ef2fbSMiaohe Lin int ret; 221ff4ef2fbSMiaohe Lin 222ff4ef2fbSMiaohe Lin addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE, 223ff4ef2fbSMiaohe Lin MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 224ff4ef2fbSMiaohe Lin if (addr == MAP_FAILED) 225ff4ef2fbSMiaohe Lin SKIP(return, "mmap failed, not enough memory.\n"); 226ff4ef2fbSMiaohe Lin memset(addr, 0xce, self->page_size); 227ff4ef2fbSMiaohe Lin 228ff4ef2fbSMiaohe Lin prepare(_metadata, self, addr); 229ff4ef2fbSMiaohe Lin 230ff4ef2fbSMiaohe Lin ret = sigsetjmp(signal_jmp_buf, 1); 231ff4ef2fbSMiaohe Lin if (!self->triggered) { 232ff4ef2fbSMiaohe Lin self->triggered = true; 233ff4ef2fbSMiaohe Lin ASSERT_EQ(variant->inject(self, addr), 0); 234ff4ef2fbSMiaohe Lin FORCE_READ(*addr); 235ff4ef2fbSMiaohe Lin } 236ff4ef2fbSMiaohe Lin 237ff4ef2fbSMiaohe Lin if (variant->type == MADV_HARD) 238ff4ef2fbSMiaohe Lin check(_metadata, self, addr, MADV_HARD_ANON, ret); 239ff4ef2fbSMiaohe Lin else 240ff4ef2fbSMiaohe Lin check(_metadata, self, addr, MADV_SOFT_ANON, ret); 241ff4ef2fbSMiaohe Lin 242ff4ef2fbSMiaohe Lin cleanup(_metadata, self, addr); 243ff4ef2fbSMiaohe Lin 244ff4ef2fbSMiaohe Lin ASSERT_EQ(munmap(addr, self->page_size), 0); 245ff4ef2fbSMiaohe Lin } 246ff4ef2fbSMiaohe Lin 247*12e8a2faSMiaohe Lin /* Borrowed from mm/gup_longterm.c. */ 248*12e8a2faSMiaohe Lin static int get_fs_type(int fd) 249*12e8a2faSMiaohe Lin { 250*12e8a2faSMiaohe Lin struct statfs fs; 251*12e8a2faSMiaohe Lin int ret; 252*12e8a2faSMiaohe Lin 253*12e8a2faSMiaohe Lin do { 254*12e8a2faSMiaohe Lin ret = fstatfs(fd, &fs); 255*12e8a2faSMiaohe Lin } while (ret && errno == EINTR); 256*12e8a2faSMiaohe Lin 257*12e8a2faSMiaohe Lin return ret ? 0 : (int)fs.f_type; 258*12e8a2faSMiaohe Lin } 259*12e8a2faSMiaohe Lin 260*12e8a2faSMiaohe Lin TEST_F(memory_failure, clean_pagecache) 261*12e8a2faSMiaohe Lin { 262*12e8a2faSMiaohe Lin const char *fname = "./clean-page-cache-test-file"; 263*12e8a2faSMiaohe Lin int fd; 264*12e8a2faSMiaohe Lin char *addr; 265*12e8a2faSMiaohe Lin int ret; 266*12e8a2faSMiaohe Lin int fs_type; 267*12e8a2faSMiaohe Lin 268*12e8a2faSMiaohe Lin fd = open(fname, O_RDWR | O_CREAT, 0664); 269*12e8a2faSMiaohe Lin if (fd < 0) 270*12e8a2faSMiaohe Lin SKIP(return, "failed to open test file.\n"); 271*12e8a2faSMiaohe Lin unlink(fname); 272*12e8a2faSMiaohe Lin ftruncate(fd, self->page_size); 273*12e8a2faSMiaohe Lin fs_type = get_fs_type(fd); 274*12e8a2faSMiaohe Lin if (!fs_type || fs_type == TMPFS_MAGIC) 275*12e8a2faSMiaohe Lin SKIP(return, "unsupported filesystem :%x\n", fs_type); 276*12e8a2faSMiaohe Lin 277*12e8a2faSMiaohe Lin addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE, 278*12e8a2faSMiaohe Lin MAP_SHARED, fd, 0); 279*12e8a2faSMiaohe Lin if (addr == MAP_FAILED) 280*12e8a2faSMiaohe Lin SKIP(return, "mmap failed, not enough memory.\n"); 281*12e8a2faSMiaohe Lin memset(addr, 0xce, self->page_size); 282*12e8a2faSMiaohe Lin fsync(fd); 283*12e8a2faSMiaohe Lin 284*12e8a2faSMiaohe Lin prepare(_metadata, self, addr); 285*12e8a2faSMiaohe Lin 286*12e8a2faSMiaohe Lin ret = sigsetjmp(signal_jmp_buf, 1); 287*12e8a2faSMiaohe Lin if (!self->triggered) { 288*12e8a2faSMiaohe Lin self->triggered = true; 289*12e8a2faSMiaohe Lin ASSERT_EQ(variant->inject(self, addr), 0); 290*12e8a2faSMiaohe Lin FORCE_READ(*addr); 291*12e8a2faSMiaohe Lin } 292*12e8a2faSMiaohe Lin 293*12e8a2faSMiaohe Lin if (variant->type == MADV_HARD) 294*12e8a2faSMiaohe Lin check(_metadata, self, addr, MADV_HARD_CLEAN_PAGECACHE, ret); 295*12e8a2faSMiaohe Lin else 296*12e8a2faSMiaohe Lin check(_metadata, self, addr, MADV_SOFT_CLEAN_PAGECACHE, ret); 297*12e8a2faSMiaohe Lin 298*12e8a2faSMiaohe Lin cleanup(_metadata, self, addr); 299*12e8a2faSMiaohe Lin 300*12e8a2faSMiaohe Lin ASSERT_EQ(munmap(addr, self->page_size), 0); 301*12e8a2faSMiaohe Lin 302*12e8a2faSMiaohe Lin ASSERT_EQ(close(fd), 0); 303*12e8a2faSMiaohe Lin } 304*12e8a2faSMiaohe Lin 305ff4ef2fbSMiaohe Lin TEST_HARNESS_MAIN 306