1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright(c) 2018 Intel Corporation. All rights reserved. 3 4 #include <linux/mm.h> 5 #include <linux/init.h> 6 #include <linux/mmzone.h> 7 #include <linux/random.h> 8 #include <linux/moduleparam.h> 9 #include "internal.h" 10 #include "shuffle.h" 11 12 DEFINE_STATIC_KEY_FALSE(page_alloc_shuffle_key); 13 14 static bool shuffle_param; 15 static int shuffle_show(char *buffer, const struct kernel_param *kp) 16 { 17 return sprintf(buffer, "%c\n", shuffle_param ? 'Y' : 'N'); 18 } 19 20 static __meminit int shuffle_store(const char *val, 21 const struct kernel_param *kp) 22 { 23 int rc = param_set_bool(val, kp); 24 25 if (rc < 0) 26 return rc; 27 if (shuffle_param) 28 static_branch_enable(&page_alloc_shuffle_key); 29 return 0; 30 } 31 module_param_call(shuffle, shuffle_store, shuffle_show, &shuffle_param, 0400); 32 33 /* 34 * For two pages to be swapped in the shuffle, they must be free (on a 35 * 'free_area' lru), have the same order, and have the same migratetype. 36 */ 37 static struct page * __meminit shuffle_valid_page(struct zone *zone, 38 unsigned long pfn, int order) 39 { 40 struct page *page = pfn_to_online_page(pfn); 41 42 /* 43 * Given we're dealing with randomly selected pfns in a zone we 44 * need to ask questions like... 45 */ 46 47 /* ... is the page managed by the buddy? */ 48 if (!page) 49 return NULL; 50 51 /* ... is the page assigned to the same zone? */ 52 if (page_zone(page) != zone) 53 return NULL; 54 55 /* ...is the page free and currently on a free_area list? */ 56 if (!PageBuddy(page)) 57 return NULL; 58 59 /* 60 * ...is the page on the same list as the page we will 61 * shuffle it with? 62 */ 63 if (buddy_order(page) != order) 64 return NULL; 65 66 return page; 67 } 68 69 /* 70 * Fisher-Yates shuffle the freelist which prescribes iterating through an 71 * array, pfns in this case, and randomly swapping each entry with another in 72 * the span, end_pfn - start_pfn. 73 * 74 * To keep the implementation simple it does not attempt to correct for sources 75 * of bias in the distribution, like modulo bias or pseudo-random number 76 * generator bias. I.e. the expectation is that this shuffling raises the bar 77 * for attacks that exploit the predictability of page allocations, but need not 78 * be a perfect shuffle. 79 */ 80 #define SHUFFLE_RETRY 10 81 void __meminit __shuffle_zone(struct zone *z) 82 { 83 unsigned long i, flags; 84 unsigned long start_pfn = z->zone_start_pfn; 85 unsigned long end_pfn = zone_end_pfn(z); 86 const int order = SHUFFLE_ORDER; 87 const int order_pages = 1 << order; 88 89 spin_lock_irqsave(&z->lock, flags); 90 start_pfn = ALIGN(start_pfn, order_pages); 91 for (i = start_pfn; i < end_pfn; i += order_pages) { 92 unsigned long j; 93 int migratetype, retry; 94 struct page *page_i, *page_j; 95 96 /* 97 * We expect page_i, in the sub-range of a zone being added 98 * (@start_pfn to @end_pfn), to more likely be valid compared to 99 * page_j randomly selected in the span @zone_start_pfn to 100 * @spanned_pages. 101 */ 102 page_i = shuffle_valid_page(z, i, order); 103 if (!page_i) 104 continue; 105 106 for (retry = 0; retry < SHUFFLE_RETRY; retry++) { 107 /* 108 * Pick a random order aligned page in the zone span as 109 * a swap target. If the selected pfn is a hole, retry 110 * up to SHUFFLE_RETRY attempts find a random valid pfn 111 * in the zone. 112 */ 113 j = z->zone_start_pfn + 114 ALIGN_DOWN(get_random_long() % z->spanned_pages, 115 order_pages); 116 page_j = shuffle_valid_page(z, j, order); 117 if (page_j && page_j != page_i) 118 break; 119 } 120 if (retry >= SHUFFLE_RETRY) { 121 pr_debug("%s: failed to swap %#lx\n", __func__, i); 122 continue; 123 } 124 125 /* 126 * Each migratetype corresponds to its own list, make sure the 127 * types match otherwise we're moving pages to lists where they 128 * do not belong. 129 */ 130 migratetype = get_pageblock_migratetype(page_i); 131 if (get_pageblock_migratetype(page_j) != migratetype) { 132 pr_debug("%s: migratetype mismatch %#lx\n", __func__, i); 133 continue; 134 } 135 136 list_swap(&page_i->lru, &page_j->lru); 137 138 pr_debug("%s: swap: %#lx -> %#lx\n", __func__, i, j); 139 140 /* take it easy on the zone lock */ 141 if ((i % (100 * order_pages)) == 0) { 142 spin_unlock_irqrestore(&z->lock, flags); 143 cond_resched(); 144 spin_lock_irqsave(&z->lock, flags); 145 } 146 } 147 spin_unlock_irqrestore(&z->lock, flags); 148 } 149 150 /** 151 * shuffle_free_memory - reduce the predictability of the page allocator 152 * @pgdat: node page data 153 */ 154 void __meminit __shuffle_free_memory(pg_data_t *pgdat) 155 { 156 struct zone *z; 157 158 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) 159 shuffle_zone(z); 160 } 161 162 bool shuffle_pick_tail(void) 163 { 164 static u64 rand; 165 static u8 rand_bits; 166 bool ret; 167 168 /* 169 * The lack of locking is deliberate. If 2 threads race to 170 * update the rand state it just adds to the entropy. 171 */ 172 if (rand_bits == 0) { 173 rand_bits = 64; 174 rand = get_random_u64(); 175 } 176 177 ret = rand & 1; 178 179 rand_bits--; 180 rand >>= 1; 181 182 return ret; 183 } 184