1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Lockless hierarchical page accounting & limiting 4 * 5 * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner 6 */ 7 8 #include <linux/page_counter.h> 9 #include <linux/atomic.h> 10 #include <linux/kernel.h> 11 #include <linux/string.h> 12 #include <linux/sched.h> 13 #include <linux/bug.h> 14 #include <asm/page.h> 15 16 static void propagate_protected_usage(struct page_counter *c, 17 unsigned long usage) 18 { 19 unsigned long protected, old_protected; 20 unsigned long low, min; 21 long delta; 22 23 if (!c->parent) 24 return; 25 26 min = READ_ONCE(c->min); 27 if (min || atomic_long_read(&c->min_usage)) { 28 protected = min(usage, min); 29 old_protected = atomic_long_xchg(&c->min_usage, protected); 30 delta = protected - old_protected; 31 if (delta) 32 atomic_long_add(delta, &c->parent->children_min_usage); 33 } 34 35 low = READ_ONCE(c->low); 36 if (low || atomic_long_read(&c->low_usage)) { 37 protected = min(usage, low); 38 old_protected = atomic_long_xchg(&c->low_usage, protected); 39 delta = protected - old_protected; 40 if (delta) 41 atomic_long_add(delta, &c->parent->children_low_usage); 42 } 43 } 44 45 /** 46 * page_counter_cancel - take pages out of the local counter 47 * @counter: counter 48 * @nr_pages: number of pages to cancel 49 */ 50 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages) 51 { 52 long new; 53 54 new = atomic_long_sub_return(nr_pages, &counter->usage); 55 propagate_protected_usage(counter, new); 56 /* More uncharges than charges? */ 57 WARN_ON_ONCE(new < 0); 58 } 59 60 /** 61 * page_counter_charge - hierarchically charge pages 62 * @counter: counter 63 * @nr_pages: number of pages to charge 64 * 65 * NOTE: This does not consider any configured counter limits. 66 */ 67 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) 68 { 69 struct page_counter *c; 70 71 for (c = counter; c; c = c->parent) { 72 long new; 73 74 new = atomic_long_add_return(nr_pages, &c->usage); 75 propagate_protected_usage(counter, new); 76 /* 77 * This is indeed racy, but we can live with some 78 * inaccuracy in the watermark. 79 */ 80 if (new > c->watermark) 81 c->watermark = new; 82 } 83 } 84 85 /** 86 * page_counter_try_charge - try to hierarchically charge pages 87 * @counter: counter 88 * @nr_pages: number of pages to charge 89 * @fail: points first counter to hit its limit, if any 90 * 91 * Returns %true on success, or %false and @fail if the counter or one 92 * of its ancestors has hit its configured limit. 93 */ 94 bool page_counter_try_charge(struct page_counter *counter, 95 unsigned long nr_pages, 96 struct page_counter **fail) 97 { 98 struct page_counter *c; 99 100 for (c = counter; c; c = c->parent) { 101 long new; 102 /* 103 * Charge speculatively to avoid an expensive CAS. If 104 * a bigger charge fails, it might falsely lock out a 105 * racing smaller charge and send it into reclaim 106 * early, but the error is limited to the difference 107 * between the two sizes, which is less than 2M/4M in 108 * case of a THP locking out a regular page charge. 109 * 110 * The atomic_long_add_return() implies a full memory 111 * barrier between incrementing the count and reading 112 * the limit. When racing with page_counter_limit(), 113 * we either see the new limit or the setter sees the 114 * counter has changed and retries. 115 */ 116 new = atomic_long_add_return(nr_pages, &c->usage); 117 if (new > c->max) { 118 atomic_long_sub(nr_pages, &c->usage); 119 propagate_protected_usage(counter, new); 120 /* 121 * This is racy, but we can live with some 122 * inaccuracy in the failcnt. 123 */ 124 c->failcnt++; 125 *fail = c; 126 goto failed; 127 } 128 propagate_protected_usage(counter, new); 129 /* 130 * Just like with failcnt, we can live with some 131 * inaccuracy in the watermark. 132 */ 133 if (new > c->watermark) 134 c->watermark = new; 135 } 136 return true; 137 138 failed: 139 for (c = counter; c != *fail; c = c->parent) 140 page_counter_cancel(c, nr_pages); 141 142 return false; 143 } 144 145 /** 146 * page_counter_uncharge - hierarchically uncharge pages 147 * @counter: counter 148 * @nr_pages: number of pages to uncharge 149 */ 150 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages) 151 { 152 struct page_counter *c; 153 154 for (c = counter; c; c = c->parent) 155 page_counter_cancel(c, nr_pages); 156 } 157 158 /** 159 * page_counter_set_max - set the maximum number of pages allowed 160 * @counter: counter 161 * @nr_pages: limit to set 162 * 163 * Returns 0 on success, -EBUSY if the current number of pages on the 164 * counter already exceeds the specified limit. 165 * 166 * The caller must serialize invocations on the same counter. 167 */ 168 int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages) 169 { 170 for (;;) { 171 unsigned long old; 172 long usage; 173 174 /* 175 * Update the limit while making sure that it's not 176 * below the concurrently-changing counter value. 177 * 178 * The xchg implies two full memory barriers before 179 * and after, so the read-swap-read is ordered and 180 * ensures coherency with page_counter_try_charge(): 181 * that function modifies the count before checking 182 * the limit, so if it sees the old limit, we see the 183 * modified counter and retry. 184 */ 185 usage = atomic_long_read(&counter->usage); 186 187 if (usage > nr_pages) 188 return -EBUSY; 189 190 old = xchg(&counter->max, nr_pages); 191 192 if (atomic_long_read(&counter->usage) <= usage) 193 return 0; 194 195 counter->max = old; 196 cond_resched(); 197 } 198 } 199 200 /** 201 * page_counter_set_min - set the amount of protected memory 202 * @counter: counter 203 * @nr_pages: value to set 204 * 205 * The caller must serialize invocations on the same counter. 206 */ 207 void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages) 208 { 209 struct page_counter *c; 210 211 WRITE_ONCE(counter->min, nr_pages); 212 213 for (c = counter; c; c = c->parent) 214 propagate_protected_usage(c, atomic_long_read(&c->usage)); 215 } 216 217 /** 218 * page_counter_set_low - set the amount of protected memory 219 * @counter: counter 220 * @nr_pages: value to set 221 * 222 * The caller must serialize invocations on the same counter. 223 */ 224 void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages) 225 { 226 struct page_counter *c; 227 228 WRITE_ONCE(counter->low, nr_pages); 229 230 for (c = counter; c; c = c->parent) 231 propagate_protected_usage(c, atomic_long_read(&c->usage)); 232 } 233 234 /** 235 * page_counter_memparse - memparse() for page counter limits 236 * @buf: string to parse 237 * @max: string meaning maximum possible value 238 * @nr_pages: returns the result in number of pages 239 * 240 * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be 241 * limited to %PAGE_COUNTER_MAX. 242 */ 243 int page_counter_memparse(const char *buf, const char *max, 244 unsigned long *nr_pages) 245 { 246 char *end; 247 u64 bytes; 248 249 if (!strcmp(buf, max)) { 250 *nr_pages = PAGE_COUNTER_MAX; 251 return 0; 252 } 253 254 bytes = memparse(buf, &end); 255 if (*end != '\0') 256 return -EINVAL; 257 258 *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX); 259 260 return 0; 261 } 262