1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* AFS volume management 3 * 4 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/slab.h> 10 #include "internal.h" 11 12 static unsigned __read_mostly afs_volume_record_life = 60 * 60; 13 14 static void afs_destroy_volume(struct work_struct *work); 15 16 /* 17 * Insert a volume into a cell. If there's an existing volume record, that is 18 * returned instead with a ref held. 19 */ 20 static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell, 21 struct afs_volume *volume) 22 { 23 struct afs_volume *p; 24 struct rb_node *parent = NULL, **pp; 25 26 write_seqlock(&cell->volume_lock); 27 28 pp = &cell->volumes.rb_node; 29 while (*pp) { 30 parent = *pp; 31 p = rb_entry(parent, struct afs_volume, cell_node); 32 if (p->vid < volume->vid) { 33 pp = &(*pp)->rb_left; 34 } else if (p->vid > volume->vid) { 35 pp = &(*pp)->rb_right; 36 } else { 37 if (afs_try_get_volume(p, afs_volume_trace_get_cell_insert)) { 38 volume = p; 39 goto found; 40 } 41 42 set_bit(AFS_VOLUME_RM_TREE, &volume->flags); 43 rb_replace_node_rcu(&p->cell_node, &volume->cell_node, &cell->volumes); 44 } 45 } 46 47 rb_link_node_rcu(&volume->cell_node, parent, pp); 48 rb_insert_color(&volume->cell_node, &cell->volumes); 49 hlist_add_head_rcu(&volume->proc_link, &cell->proc_volumes); 50 51 found: 52 write_sequnlock(&cell->volume_lock); 53 return volume; 54 55 } 56 57 static void afs_remove_volume_from_cell(struct afs_volume *volume) 58 { 59 struct afs_cell *cell = volume->cell; 60 61 if (!hlist_unhashed(&volume->proc_link)) { 62 trace_afs_volume(volume->vid, refcount_read(&cell->ref), 63 afs_volume_trace_remove); 64 write_seqlock(&cell->volume_lock); 65 hlist_del_rcu(&volume->proc_link); 66 if (!test_and_set_bit(AFS_VOLUME_RM_TREE, &volume->flags)) 67 rb_erase(&volume->cell_node, &cell->volumes); 68 write_sequnlock(&cell->volume_lock); 69 } 70 } 71 72 /* 73 * Allocate a volume record and load it up from a vldb record. 74 */ 75 static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, 76 struct afs_vldb_entry *vldb, 77 struct afs_server_list **_slist) 78 { 79 struct afs_server_list *slist; 80 struct afs_volume *volume; 81 int ret = -ENOMEM, i; 82 83 volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); 84 if (!volume) 85 goto error_0; 86 87 volume->vid = vldb->vid[params->type]; 88 volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; 89 volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol); 90 volume->type = params->type; 91 volume->type_force = params->force; 92 volume->name_len = vldb->name_len; 93 volume->creation_time = TIME64_MIN; 94 volume->update_time = TIME64_MIN; 95 96 refcount_set(&volume->ref, 1); 97 INIT_HLIST_NODE(&volume->proc_link); 98 INIT_WORK(&volume->destructor, afs_destroy_volume); 99 rwlock_init(&volume->servers_lock); 100 mutex_init(&volume->volsync_lock); 101 mutex_init(&volume->cb_check_lock); 102 rwlock_init(&volume->cb_v_break_lock); 103 INIT_LIST_HEAD(&volume->open_mmaps); 104 init_rwsem(&volume->open_mmaps_lock); 105 memcpy(volume->name, vldb->name, vldb->name_len + 1); 106 107 for (i = 0; i < AFS_MAXTYPES; i++) 108 volume->vids[i] = vldb->vid[i]; 109 110 slist = afs_alloc_server_list(volume, params->key, vldb); 111 if (IS_ERR(slist)) { 112 ret = PTR_ERR(slist); 113 goto error_1; 114 } 115 116 *_slist = slist; 117 rcu_assign_pointer(volume->servers, slist); 118 trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc); 119 return volume; 120 121 error_1: 122 afs_put_cell(volume->cell, afs_cell_trace_put_vol); 123 kfree(volume); 124 error_0: 125 return ERR_PTR(ret); 126 } 127 128 /* 129 * Look up or allocate a volume record. 130 */ 131 static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params, 132 struct afs_vldb_entry *vldb) 133 { 134 struct afs_server_list *slist; 135 struct afs_volume *candidate, *volume; 136 137 candidate = afs_alloc_volume(params, vldb, &slist); 138 if (IS_ERR(candidate)) 139 return candidate; 140 141 volume = afs_insert_volume_into_cell(params->cell, candidate); 142 if (volume == candidate) 143 afs_attach_volume_to_servers(volume, slist); 144 else 145 afs_put_volume(candidate, afs_volume_trace_put_cell_dup); 146 return volume; 147 } 148 149 /* 150 * Look up a VLDB record for a volume. 151 */ 152 static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, 153 struct key *key, 154 const char *volname, 155 size_t volnamesz) 156 { 157 struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ); 158 struct afs_vl_cursor vc; 159 int ret; 160 161 if (!afs_begin_vlserver_operation(&vc, cell, key)) 162 return ERR_PTR(-ERESTARTSYS); 163 164 while (afs_select_vlserver(&vc)) { 165 vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz); 166 } 167 168 ret = afs_end_vlserver_operation(&vc); 169 return ret < 0 ? ERR_PTR(ret) : vldb; 170 } 171 172 /* 173 * Look up a volume in the VL server and create a candidate volume record for 174 * it. 175 * 176 * The volume name can be one of the following: 177 * "%[cell:]volume[.]" R/W volume 178 * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), 179 * or R/W (rwparent=1) volume 180 * "%[cell:]volume.readonly" R/O volume 181 * "#[cell:]volume.readonly" R/O volume 182 * "%[cell:]volume.backup" Backup volume 183 * "#[cell:]volume.backup" Backup volume 184 * 185 * The cell name is optional, and defaults to the current cell. 186 * 187 * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin 188 * Guide 189 * - Rule 1: Explicit type suffix forces access of that type or nothing 190 * (no suffix, then use Rule 2 & 3) 191 * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W 192 * if not available 193 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless 194 * explicitly told otherwise 195 */ 196 struct afs_volume *afs_create_volume(struct afs_fs_context *params) 197 { 198 struct afs_vldb_entry *vldb; 199 struct afs_volume *volume; 200 unsigned long type_mask = 1UL << params->type; 201 202 vldb = afs_vl_lookup_vldb(params->cell, params->key, 203 params->volname, params->volnamesz); 204 if (IS_ERR(vldb)) 205 return ERR_CAST(vldb); 206 207 if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) { 208 volume = ERR_PTR(vldb->error); 209 goto error; 210 } 211 212 /* Make the final decision on the type we want */ 213 volume = ERR_PTR(-ENOMEDIUM); 214 if (params->force) { 215 if (!(vldb->flags & type_mask)) 216 goto error; 217 } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) { 218 params->type = AFSVL_ROVOL; 219 } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) { 220 params->type = AFSVL_RWVOL; 221 } else { 222 goto error; 223 } 224 225 volume = afs_lookup_volume(params, vldb); 226 227 error: 228 kfree(vldb); 229 return volume; 230 } 231 232 /* 233 * Destroy a volume record 234 */ 235 static void afs_destroy_volume(struct work_struct *work) 236 { 237 struct afs_volume *volume = container_of(work, struct afs_volume, destructor); 238 struct afs_server_list *slist = rcu_access_pointer(volume->servers); 239 240 _enter("%p", volume); 241 242 #ifdef CONFIG_AFS_FSCACHE 243 ASSERTCMP(volume->cache, ==, NULL); 244 #endif 245 246 afs_detach_volume_from_servers(volume, slist); 247 afs_remove_volume_from_cell(volume); 248 afs_put_serverlist(volume->cell->net, slist); 249 afs_put_cell(volume->cell, afs_cell_trace_put_vol); 250 trace_afs_volume(volume->vid, refcount_read(&volume->ref), 251 afs_volume_trace_free); 252 kfree_rcu(volume, rcu); 253 254 _leave(" [destroyed]"); 255 } 256 257 /* 258 * Try to get a reference on a volume record. 259 */ 260 bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason) 261 { 262 int r; 263 264 if (__refcount_inc_not_zero(&volume->ref, &r)) { 265 trace_afs_volume(volume->vid, r + 1, reason); 266 return true; 267 } 268 return false; 269 } 270 271 /* 272 * Get a reference on a volume record. 273 */ 274 struct afs_volume *afs_get_volume(struct afs_volume *volume, 275 enum afs_volume_trace reason) 276 { 277 if (volume) { 278 int r; 279 280 __refcount_inc(&volume->ref, &r); 281 trace_afs_volume(volume->vid, r + 1, reason); 282 } 283 return volume; 284 } 285 286 287 /* 288 * Drop a reference on a volume record. 289 */ 290 void afs_put_volume(struct afs_volume *volume, enum afs_volume_trace reason) 291 { 292 if (volume) { 293 afs_volid_t vid = volume->vid; 294 bool zero; 295 int r; 296 297 zero = __refcount_dec_and_test(&volume->ref, &r); 298 trace_afs_volume(vid, r - 1, reason); 299 if (zero) 300 schedule_work(&volume->destructor); 301 } 302 } 303 304 /* 305 * Activate a volume. 306 */ 307 int afs_activate_volume(struct afs_volume *volume) 308 { 309 #ifdef CONFIG_AFS_FSCACHE 310 struct fscache_volume *vcookie; 311 char *name; 312 313 name = kasprintf(GFP_KERNEL, "afs,%s,%llx", 314 volume->cell->name, volume->vid); 315 if (!name) 316 return -ENOMEM; 317 318 vcookie = fscache_acquire_volume(name, NULL, NULL, 0); 319 if (IS_ERR(vcookie)) { 320 if (vcookie != ERR_PTR(-EBUSY)) { 321 kfree(name); 322 return PTR_ERR(vcookie); 323 } 324 pr_err("AFS: Cache volume key already in use (%s)\n", name); 325 vcookie = NULL; 326 } 327 volume->cache = vcookie; 328 kfree(name); 329 #endif 330 return 0; 331 } 332 333 /* 334 * Deactivate a volume. 335 */ 336 void afs_deactivate_volume(struct afs_volume *volume) 337 { 338 _enter("%s", volume->name); 339 340 #ifdef CONFIG_AFS_FSCACHE 341 fscache_relinquish_volume(volume->cache, NULL, 342 test_bit(AFS_VOLUME_DELETED, &volume->flags)); 343 volume->cache = NULL; 344 #endif 345 346 _leave(""); 347 } 348 349 /* 350 * Query the VL service to update the volume status. 351 */ 352 static int afs_update_volume_status(struct afs_volume *volume, struct key *key) 353 { 354 struct afs_server_list *new, *old, *discard; 355 struct afs_vldb_entry *vldb; 356 char idbuf[16]; 357 int ret, idsz; 358 359 _enter(""); 360 361 /* We look up an ID by passing it as a decimal string in the 362 * operation's name parameter. 363 */ 364 idsz = sprintf(idbuf, "%llu", volume->vid); 365 366 vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); 367 if (IS_ERR(vldb)) { 368 ret = PTR_ERR(vldb); 369 goto error; 370 } 371 372 /* See if the volume got renamed. */ 373 if (vldb->name_len != volume->name_len || 374 memcmp(vldb->name, volume->name, vldb->name_len) != 0) { 375 /* TODO: Use RCU'd string. */ 376 memcpy(volume->name, vldb->name, AFS_MAXVOLNAME); 377 volume->name_len = vldb->name_len; 378 } 379 380 /* See if the volume's server list got updated. */ 381 new = afs_alloc_server_list(volume, key, vldb); 382 if (IS_ERR(new)) { 383 ret = PTR_ERR(new); 384 goto error_vldb; 385 } 386 387 write_lock(&volume->servers_lock); 388 389 discard = new; 390 old = rcu_dereference_protected(volume->servers, 391 lockdep_is_held(&volume->servers_lock)); 392 if (afs_annotate_server_list(new, old)) { 393 new->seq = volume->servers_seq + 1; 394 rcu_assign_pointer(volume->servers, new); 395 smp_wmb(); 396 volume->servers_seq++; 397 discard = old; 398 } 399 400 /* Check more often if replication is ongoing. */ 401 if (new->ro_replicating) 402 volume->update_at = ktime_get_real_seconds() + 10 * 60; 403 else 404 volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; 405 write_unlock(&volume->servers_lock); 406 407 if (discard == old) 408 afs_reattach_volume_to_servers(volume, new, old); 409 afs_put_serverlist(volume->cell->net, discard); 410 ret = 0; 411 error_vldb: 412 kfree(vldb); 413 error: 414 _leave(" = %d", ret); 415 return ret; 416 } 417 418 /* 419 * Make sure the volume record is up to date. 420 */ 421 int afs_check_volume_status(struct afs_volume *volume, struct afs_operation *op) 422 { 423 int ret, retries = 0; 424 425 _enter(""); 426 427 retry: 428 if (test_bit(AFS_VOLUME_WAIT, &volume->flags)) 429 goto wait; 430 if (volume->update_at <= ktime_get_real_seconds() || 431 test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags)) 432 goto update; 433 _leave(" = 0"); 434 return 0; 435 436 update: 437 if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) { 438 clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); 439 ret = afs_update_volume_status(volume, op->key); 440 if (ret < 0) 441 set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); 442 clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags); 443 clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags); 444 wake_up_bit(&volume->flags, AFS_VOLUME_WAIT); 445 _leave(" = %d", ret); 446 return ret; 447 } 448 449 wait: 450 if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) { 451 _leave(" = 0 [no wait]"); 452 return 0; 453 } 454 455 ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, 456 (op->flags & AFS_OPERATION_UNINTR) ? 457 TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE); 458 if (ret == -ERESTARTSYS) { 459 _leave(" = %d", ret); 460 return ret; 461 } 462 463 retries++; 464 if (retries == 4) { 465 _leave(" = -ESTALE"); 466 return -ESTALE; 467 } 468 goto retry; 469 } 470