1 /* 2 * This file is part of UBIFS. 3 * 4 * Copyright (C) 2006-2008 Nokia Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published by 8 * the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program; if not, write to the Free Software Foundation, Inc., 51 17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 * 19 * Authors: Artem Bityutskiy (Битюцкий Артём) 20 * Adrian Hunter 21 */ 22 23 /* 24 * This file implements UBIFS shrinker which evicts clean znodes from the TNC 25 * tree when Linux VM needs more RAM. 26 * 27 * We do not implement any LRU lists to find oldest znodes to free because it 28 * would add additional overhead to the file system fast paths. So the shrinker 29 * just walks the TNC tree when searching for znodes to free. 30 * 31 * If the root of a TNC sub-tree is clean and old enough, then the children are 32 * also clean and old enough. So the shrinker walks the TNC in level order and 33 * dumps entire sub-trees. 34 * 35 * The age of znodes is just the time-stamp when they were last looked at. 36 * The current shrinker first tries to evict old znodes, then young ones. 37 * 38 * Since the shrinker is global, it has to protect against races with FS 39 * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'. 40 */ 41 42 #include "ubifs.h" 43 44 /* List of all UBIFS file-system instances */ 45 LIST_HEAD(ubifs_infos); 46 47 /* 48 * We number each shrinker run and record the number on the ubifs_info structure 49 * so that we can easily work out which ubifs_info structures have already been 50 * done by the current run. 51 */ 52 static unsigned int shrinker_run_no; 53 54 /* Protects 'ubifs_infos' list */ 55 DEFINE_SPINLOCK(ubifs_infos_lock); 56 57 /* Global clean znode counter (for all mounted UBIFS instances) */ 58 atomic_long_t ubifs_clean_zn_cnt; 59 60 /** 61 * shrink_tnc - shrink TNC tree. 62 * @c: UBIFS file-system description object 63 * @nr: number of znodes to free 64 * @age: the age of znodes to free 65 * @contention: if any contention, this is set to %1 66 * 67 * This function traverses TNC tree and frees clean znodes. It does not free 68 * clean znodes which younger then @age. Returns number of freed znodes. 69 */ 70 static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) 71 { 72 int total_freed = 0; 73 struct ubifs_znode *znode, *zprev; 74 int time = get_seconds(); 75 76 ubifs_assert(mutex_is_locked(&c->umount_mutex)); 77 ubifs_assert(mutex_is_locked(&c->tnc_mutex)); 78 79 if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0) 80 return 0; 81 82 /* 83 * Traverse the TNC tree in levelorder manner, so that it is possible 84 * to destroy large sub-trees. Indeed, if a znode is old, then all its 85 * children are older or of the same age. 86 * 87 * Note, we are holding 'c->tnc_mutex', so we do not have to lock the 88 * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is 89 * changed only when the 'c->tnc_mutex' is held. 90 */ 91 zprev = NULL; 92 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); 93 while (znode && total_freed < nr && 94 atomic_long_read(&c->clean_zn_cnt) > 0) { 95 int freed; 96 97 /* 98 * If the znode is clean, but it is in the 'c->cnext' list, this 99 * means that this znode has just been written to flash as a 100 * part of commit and was marked clean. They will be removed 101 * from the list at end commit. We cannot change the list, 102 * because it is not protected by any mutex (design decision to 103 * make commit really independent and parallel to main I/O). So 104 * we just skip these znodes. 105 * 106 * Note, the 'clean_zn_cnt' counters are not updated until 107 * after the commit, so the UBIFS shrinker does not report 108 * the znodes which are in the 'c->cnext' list as freeable. 109 * 110 * Also note, if the root of a sub-tree is not in 'c->cnext', 111 * then the whole sub-tree is not in 'c->cnext' as well, so it 112 * is safe to dump whole sub-tree. 113 */ 114 115 if (znode->cnext) { 116 /* 117 * Very soon these znodes will be removed from the list 118 * and become freeable. 119 */ 120 *contention = 1; 121 } else if (!ubifs_zn_dirty(znode) && 122 abs(time - znode->time) >= age) { 123 if (znode->parent) 124 znode->parent->zbranch[znode->iip].znode = NULL; 125 else 126 c->zroot.znode = NULL; 127 128 freed = ubifs_destroy_tnc_subtree(znode); 129 atomic_long_sub(freed, &ubifs_clean_zn_cnt); 130 atomic_long_sub(freed, &c->clean_zn_cnt); 131 total_freed += freed; 132 znode = zprev; 133 } 134 135 if (unlikely(!c->zroot.znode)) 136 break; 137 138 zprev = znode; 139 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); 140 cond_resched(); 141 } 142 143 return total_freed; 144 } 145 146 /** 147 * shrink_tnc_trees - shrink UBIFS TNC trees. 148 * @nr: number of znodes to free 149 * @age: the age of znodes to free 150 * @contention: if any contention, this is set to %1 151 * 152 * This function walks the list of mounted UBIFS file-systems and frees clean 153 * znodes which are older than @age, until at least @nr znodes are freed. 154 * Returns the number of freed znodes. 155 */ 156 static int shrink_tnc_trees(int nr, int age, int *contention) 157 { 158 struct ubifs_info *c; 159 struct list_head *p; 160 unsigned int run_no; 161 int freed = 0; 162 163 spin_lock(&ubifs_infos_lock); 164 do { 165 run_no = ++shrinker_run_no; 166 } while (run_no == 0); 167 /* Iterate over all mounted UBIFS file-systems and try to shrink them */ 168 p = ubifs_infos.next; 169 while (p != &ubifs_infos) { 170 c = list_entry(p, struct ubifs_info, infos_list); 171 /* 172 * We move the ones we do to the end of the list, so we stop 173 * when we see one we have already done. 174 */ 175 if (c->shrinker_run_no == run_no) 176 break; 177 if (!mutex_trylock(&c->umount_mutex)) { 178 /* Some un-mount is in progress, try next FS */ 179 *contention = 1; 180 p = p->next; 181 continue; 182 } 183 /* 184 * We're holding 'c->umount_mutex', so the file-system won't go 185 * away. 186 */ 187 if (!mutex_trylock(&c->tnc_mutex)) { 188 mutex_unlock(&c->umount_mutex); 189 *contention = 1; 190 p = p->next; 191 continue; 192 } 193 spin_unlock(&ubifs_infos_lock); 194 /* 195 * OK, now we have TNC locked, the file-system cannot go away - 196 * it is safe to reap the cache. 197 */ 198 c->shrinker_run_no = run_no; 199 freed += shrink_tnc(c, nr, age, contention); 200 mutex_unlock(&c->tnc_mutex); 201 spin_lock(&ubifs_infos_lock); 202 /* Get the next list element before we move this one */ 203 p = p->next; 204 /* 205 * Move this one to the end of the list to provide some 206 * fairness. 207 */ 208 list_move_tail(&c->infos_list, &ubifs_infos); 209 mutex_unlock(&c->umount_mutex); 210 if (freed >= nr) 211 break; 212 } 213 spin_unlock(&ubifs_infos_lock); 214 return freed; 215 } 216 217 /** 218 * kick_a_thread - kick a background thread to start commit. 219 * 220 * This function kicks a background thread to start background commit. Returns 221 * %-1 if a thread was kicked or there is another reason to assume the memory 222 * will soon be freed or become freeable. If there are no dirty znodes, returns 223 * %0. 224 */ 225 static int kick_a_thread(void) 226 { 227 int i; 228 struct ubifs_info *c; 229 230 /* 231 * Iterate over all mounted UBIFS file-systems and find out if there is 232 * already an ongoing commit operation there. If no, then iterate for 233 * the second time and initiate background commit. 234 */ 235 spin_lock(&ubifs_infos_lock); 236 for (i = 0; i < 2; i++) { 237 list_for_each_entry(c, &ubifs_infos, infos_list) { 238 long dirty_zn_cnt; 239 240 if (!mutex_trylock(&c->umount_mutex)) { 241 /* 242 * Some un-mount is in progress, it will 243 * certainly free memory, so just return. 244 */ 245 spin_unlock(&ubifs_infos_lock); 246 return -1; 247 } 248 249 dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); 250 251 if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || 252 c->ro_mount || c->ro_error) { 253 mutex_unlock(&c->umount_mutex); 254 continue; 255 } 256 257 if (c->cmt_state != COMMIT_RESTING) { 258 spin_unlock(&ubifs_infos_lock); 259 mutex_unlock(&c->umount_mutex); 260 return -1; 261 } 262 263 if (i == 1) { 264 list_move_tail(&c->infos_list, &ubifs_infos); 265 spin_unlock(&ubifs_infos_lock); 266 267 ubifs_request_bg_commit(c); 268 mutex_unlock(&c->umount_mutex); 269 return -1; 270 } 271 mutex_unlock(&c->umount_mutex); 272 } 273 } 274 spin_unlock(&ubifs_infos_lock); 275 276 return 0; 277 } 278 279 unsigned long ubifs_shrink_count(struct shrinker *shrink, 280 struct shrink_control *sc) 281 { 282 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 283 284 /* 285 * Due to the way UBIFS updates the clean znode counter it may 286 * temporarily be negative. 287 */ 288 return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; 289 } 290 291 unsigned long ubifs_shrink_scan(struct shrinker *shrink, 292 struct shrink_control *sc) 293 { 294 unsigned long nr = sc->nr_to_scan; 295 int contention = 0; 296 unsigned long freed; 297 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 298 299 if (!clean_zn_cnt) { 300 /* 301 * No clean znodes, nothing to reap. All we can do in this case 302 * is to kick background threads to start commit, which will 303 * probably make clean znodes which, in turn, will be freeable. 304 * And we return -1 which means will make VM call us again 305 * later. 306 */ 307 dbg_tnc("no clean znodes, kick a thread"); 308 return kick_a_thread(); 309 } 310 311 freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention); 312 if (freed >= nr) 313 goto out; 314 315 dbg_tnc("not enough old znodes, try to free young ones"); 316 freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention); 317 if (freed >= nr) 318 goto out; 319 320 dbg_tnc("not enough young znodes, free all"); 321 freed += shrink_tnc_trees(nr - freed, 0, &contention); 322 323 if (!freed && contention) { 324 dbg_tnc("freed nothing, but contention"); 325 return SHRINK_STOP; 326 } 327 328 out: 329 dbg_tnc("%lu znodes were freed, requested %lu", freed, nr); 330 return freed; 331 } 332