1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Lockless hierarchical page accounting & limiting 4 * 5 * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner 6 */ 7 8 #include <linux/page_counter.h> 9 #include <linux/atomic.h> 10 #include <linux/kernel.h> 11 #include <linux/string.h> 12 #include <linux/sched.h> 13 #include <linux/bug.h> 14 #include <asm/page.h> 15 16 /** 17 * page_counter_cancel - take pages out of the local counter 18 * @counter: counter 19 * @nr_pages: number of pages to cancel 20 */ 21 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages) 22 { 23 long new; 24 25 new = atomic_long_sub_return(nr_pages, &counter->count); 26 /* More uncharges than charges? */ 27 WARN_ON_ONCE(new < 0); 28 } 29 30 /** 31 * page_counter_charge - hierarchically charge pages 32 * @counter: counter 33 * @nr_pages: number of pages to charge 34 * 35 * NOTE: This does not consider any configured counter limits. 36 */ 37 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) 38 { 39 struct page_counter *c; 40 41 for (c = counter; c; c = c->parent) { 42 long new; 43 44 new = atomic_long_add_return(nr_pages, &c->count); 45 /* 46 * This is indeed racy, but we can live with some 47 * inaccuracy in the watermark. 48 */ 49 if (new > c->watermark) 50 c->watermark = new; 51 } 52 } 53 54 /** 55 * page_counter_try_charge - try to hierarchically charge pages 56 * @counter: counter 57 * @nr_pages: number of pages to charge 58 * @fail: points first counter to hit its limit, if any 59 * 60 * Returns %true on success, or %false and @fail if the counter or one 61 * of its ancestors has hit its configured limit. 62 */ 63 bool page_counter_try_charge(struct page_counter *counter, 64 unsigned long nr_pages, 65 struct page_counter **fail) 66 { 67 struct page_counter *c; 68 69 for (c = counter; c; c = c->parent) { 70 long new; 71 /* 72 * Charge speculatively to avoid an expensive CAS. If 73 * a bigger charge fails, it might falsely lock out a 74 * racing smaller charge and send it into reclaim 75 * early, but the error is limited to the difference 76 * between the two sizes, which is less than 2M/4M in 77 * case of a THP locking out a regular page charge. 78 * 79 * The atomic_long_add_return() implies a full memory 80 * barrier between incrementing the count and reading 81 * the limit. When racing with page_counter_limit(), 82 * we either see the new limit or the setter sees the 83 * counter has changed and retries. 84 */ 85 new = atomic_long_add_return(nr_pages, &c->count); 86 if (new > c->limit) { 87 atomic_long_sub(nr_pages, &c->count); 88 /* 89 * This is racy, but we can live with some 90 * inaccuracy in the failcnt. 91 */ 92 c->failcnt++; 93 *fail = c; 94 goto failed; 95 } 96 /* 97 * Just like with failcnt, we can live with some 98 * inaccuracy in the watermark. 99 */ 100 if (new > c->watermark) 101 c->watermark = new; 102 } 103 return true; 104 105 failed: 106 for (c = counter; c != *fail; c = c->parent) 107 page_counter_cancel(c, nr_pages); 108 109 return false; 110 } 111 112 /** 113 * page_counter_uncharge - hierarchically uncharge pages 114 * @counter: counter 115 * @nr_pages: number of pages to uncharge 116 */ 117 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages) 118 { 119 struct page_counter *c; 120 121 for (c = counter; c; c = c->parent) 122 page_counter_cancel(c, nr_pages); 123 } 124 125 /** 126 * page_counter_limit - limit the number of pages allowed 127 * @counter: counter 128 * @limit: limit to set 129 * 130 * Returns 0 on success, -EBUSY if the current number of pages on the 131 * counter already exceeds the specified limit. 132 * 133 * The caller must serialize invocations on the same counter. 134 */ 135 int page_counter_limit(struct page_counter *counter, unsigned long limit) 136 { 137 for (;;) { 138 unsigned long old; 139 long count; 140 141 /* 142 * Update the limit while making sure that it's not 143 * below the concurrently-changing counter value. 144 * 145 * The xchg implies two full memory barriers before 146 * and after, so the read-swap-read is ordered and 147 * ensures coherency with page_counter_try_charge(): 148 * that function modifies the count before checking 149 * the limit, so if it sees the old limit, we see the 150 * modified counter and retry. 151 */ 152 count = atomic_long_read(&counter->count); 153 154 if (count > limit) 155 return -EBUSY; 156 157 old = xchg(&counter->limit, limit); 158 159 if (atomic_long_read(&counter->count) <= count) 160 return 0; 161 162 counter->limit = old; 163 cond_resched(); 164 } 165 } 166 167 /** 168 * page_counter_memparse - memparse() for page counter limits 169 * @buf: string to parse 170 * @max: string meaning maximum possible value 171 * @nr_pages: returns the result in number of pages 172 * 173 * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be 174 * limited to %PAGE_COUNTER_MAX. 175 */ 176 int page_counter_memparse(const char *buf, const char *max, 177 unsigned long *nr_pages) 178 { 179 char *end; 180 u64 bytes; 181 182 if (!strcmp(buf, max)) { 183 *nr_pages = PAGE_COUNTER_MAX; 184 return 0; 185 } 186 187 bytes = memparse(buf, &end); 188 if (*end != '\0') 189 return -EINVAL; 190 191 *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX); 192 193 return 0; 194 } 195