xref: /linux/fs/afs/volume.c (revision c537b994505099b7197e7d3125b942ecbcc51eb6)
1 /* volume.c: AFS volume management
2  *
3  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 #include <linux/init.h>
15 #include <linux/slab.h>
16 #include <linux/fs.h>
17 #include <linux/pagemap.h>
18 #include "volume.h"
19 #include "vnode.h"
20 #include "cell.h"
21 #include "cache.h"
22 #include "cmservice.h"
23 #include "fsclient.h"
24 #include "vlclient.h"
25 #include "internal.h"
26 
27 #ifdef __KDEBUG
28 static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
29 #endif
30 
31 #ifdef AFS_CACHING_SUPPORT
32 static cachefs_match_val_t afs_volume_cache_match(void *target,
33 						  const void *entry);
34 static void afs_volume_cache_update(void *source, void *entry);
35 
36 struct cachefs_index_def afs_volume_cache_index_def = {
37 	.name		= "volume",
38 	.data_size	= sizeof(struct afs_cache_vhash),
39 	.keys[0]	= { CACHEFS_INDEX_KEYS_BIN, 1 },
40 	.keys[1]	= { CACHEFS_INDEX_KEYS_BIN, 1 },
41 	.match		= afs_volume_cache_match,
42 	.update		= afs_volume_cache_update,
43 };
44 #endif
45 
46 /*****************************************************************************/
47 /*
48  * lookup a volume by name
49  * - this can be one of the following:
50  *	"%[cell:]volume[.]"		R/W volume
51  *	"#[cell:]volume[.]"		R/O or R/W volume (rwparent=0),
52  *					 or R/W (rwparent=1) volume
53  *	"%[cell:]volume.readonly"	R/O volume
54  *	"#[cell:]volume.readonly"	R/O volume
55  *	"%[cell:]volume.backup"		Backup volume
56  *	"#[cell:]volume.backup"		Backup volume
57  *
58  * The cell name is optional, and defaults to the current cell.
59  *
60  * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
61  * Guide
62  * - Rule 1: Explicit type suffix forces access of that type or nothing
63  *           (no suffix, then use Rule 2 & 3)
64  * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
65  *           if not available
66  * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
67  *           explicitly told otherwise
68  */
69 int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
70 		      struct afs_volume **_volume)
71 {
72 	struct afs_vlocation *vlocation = NULL;
73 	struct afs_volume *volume = NULL;
74 	afs_voltype_t type;
75 	const char *cellname, *volname, *suffix;
76 	char srvtmask;
77 	int force, ret, loop, cellnamesz, volnamesz;
78 
79 	_enter("%s,,%d,", name, rwpath);
80 
81 	if (!name || (name[0] != '%' && name[0] != '#') || !name[1]) {
82 		printk("kAFS: unparsable volume name\n");
83 		return -EINVAL;
84 	}
85 
86 	/* determine the type of volume we're looking for */
87 	force = 0;
88 	type = AFSVL_ROVOL;
89 
90 	if (rwpath || name[0] == '%') {
91 		type = AFSVL_RWVOL;
92 		force = 1;
93 	}
94 
95 	suffix = strrchr(name, '.');
96 	if (suffix) {
97 		if (strcmp(suffix, ".readonly") == 0) {
98 			type = AFSVL_ROVOL;
99 			force = 1;
100 		}
101 		else if (strcmp(suffix, ".backup") == 0) {
102 			type = AFSVL_BACKVOL;
103 			force = 1;
104 		}
105 		else if (suffix[1] == 0) {
106 		}
107 		else {
108 			suffix = NULL;
109 		}
110 	}
111 
112 	/* split the cell and volume names */
113 	name++;
114 	volname = strchr(name, ':');
115 	if (volname) {
116 		cellname = name;
117 		cellnamesz = volname - name;
118 		volname++;
119 	}
120 	else {
121 		volname = name;
122 		cellname = NULL;
123 		cellnamesz = 0;
124 	}
125 
126 	volnamesz = suffix ? suffix - volname : strlen(volname);
127 
128 	_debug("CELL:%*.*s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
129 	       cellnamesz, cellnamesz, cellname ?: "", cell,
130 	       volnamesz, volnamesz, volname, suffix ?: "-",
131 	       type,
132 	       force ? " FORCE" : "");
133 
134 	/* lookup the cell record */
135 	if (cellname || !cell) {
136 		ret = afs_cell_lookup(cellname, cellnamesz, &cell);
137 		if (ret<0) {
138 			printk("kAFS: unable to lookup cell '%s'\n",
139 			       cellname ?: "");
140 			goto error;
141 		}
142 	}
143 	else {
144 		afs_get_cell(cell);
145 	}
146 
147 	/* lookup the volume location record */
148 	ret = afs_vlocation_lookup(cell, volname, volnamesz, &vlocation);
149 	if (ret < 0)
150 		goto error;
151 
152 	/* make the final decision on the type we want */
153 	ret = -ENOMEDIUM;
154 	if (force && !(vlocation->vldb.vidmask & (1 << type)))
155 		goto error;
156 
157 	srvtmask = 0;
158 	for (loop = 0; loop < vlocation->vldb.nservers; loop++)
159 		srvtmask |= vlocation->vldb.srvtmask[loop];
160 
161 	if (force) {
162 		if (!(srvtmask & (1 << type)))
163 			goto error;
164 	}
165 	else if (srvtmask & AFS_VOL_VTM_RO) {
166 		type = AFSVL_ROVOL;
167 	}
168 	else if (srvtmask & AFS_VOL_VTM_RW) {
169 		type = AFSVL_RWVOL;
170 	}
171 	else {
172 		goto error;
173 	}
174 
175 	down_write(&cell->vl_sem);
176 
177 	/* is the volume already active? */
178 	if (vlocation->vols[type]) {
179 		/* yes - re-use it */
180 		volume = vlocation->vols[type];
181 		afs_get_volume(volume);
182 		goto success;
183 	}
184 
185 	/* create a new volume record */
186 	_debug("creating new volume record");
187 
188 	ret = -ENOMEM;
189 	volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
190 	if (!volume)
191 		goto error_up;
192 
193 	atomic_set(&volume->usage, 1);
194 	volume->type		= type;
195 	volume->type_force	= force;
196 	volume->cell		= cell;
197 	volume->vid		= vlocation->vldb.vid[type];
198 
199 	init_rwsem(&volume->server_sem);
200 
201 	/* look up all the applicable server records */
202 	for (loop = 0; loop < 8; loop++) {
203 		if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
204 			ret = afs_server_lookup(
205 				volume->cell,
206 				&vlocation->vldb.servers[loop],
207 				&volume->servers[volume->nservers]);
208 			if (ret < 0)
209 				goto error_discard;
210 
211 			volume->nservers++;
212 		}
213 	}
214 
215 	/* attach the cache and volume location */
216 #ifdef AFS_CACHING_SUPPORT
217 	cachefs_acquire_cookie(vlocation->cache,
218 			       &afs_vnode_cache_index_def,
219 			       volume,
220 			       &volume->cache);
221 #endif
222 
223 	afs_get_vlocation(vlocation);
224 	volume->vlocation = vlocation;
225 
226 	vlocation->vols[type] = volume;
227 
228  success:
229 	_debug("kAFS selected %s volume %08x",
230 	       afs_voltypes[volume->type], volume->vid);
231 	*_volume = volume;
232 	ret = 0;
233 
234 	/* clean up */
235  error_up:
236 	up_write(&cell->vl_sem);
237  error:
238 	afs_put_vlocation(vlocation);
239 	afs_put_cell(cell);
240 
241 	_leave(" = %d (%p)", ret, volume);
242 	return ret;
243 
244  error_discard:
245 	up_write(&cell->vl_sem);
246 
247 	for (loop = volume->nservers - 1; loop >= 0; loop--)
248 		afs_put_server(volume->servers[loop]);
249 
250 	kfree(volume);
251 	goto error;
252 } /* end afs_volume_lookup() */
253 
254 /*****************************************************************************/
255 /*
256  * destroy a volume record
257  */
258 void afs_put_volume(struct afs_volume *volume)
259 {
260 	struct afs_vlocation *vlocation;
261 	int loop;
262 
263 	if (!volume)
264 		return;
265 
266 	_enter("%p", volume);
267 
268 	vlocation = volume->vlocation;
269 
270 	/* sanity check */
271 	BUG_ON(atomic_read(&volume->usage) <= 0);
272 
273 	/* to prevent a race, the decrement and the dequeue must be effectively
274 	 * atomic */
275 	down_write(&vlocation->cell->vl_sem);
276 
277 	if (likely(!atomic_dec_and_test(&volume->usage))) {
278 		up_write(&vlocation->cell->vl_sem);
279 		_leave("");
280 		return;
281 	}
282 
283 	vlocation->vols[volume->type] = NULL;
284 
285 	up_write(&vlocation->cell->vl_sem);
286 
287 	/* finish cleaning up the volume */
288 #ifdef AFS_CACHING_SUPPORT
289 	cachefs_relinquish_cookie(volume->cache, 0);
290 #endif
291 	afs_put_vlocation(vlocation);
292 
293 	for (loop = volume->nservers - 1; loop >= 0; loop--)
294 		afs_put_server(volume->servers[loop]);
295 
296 	kfree(volume);
297 
298 	_leave(" [destroyed]");
299 } /* end afs_put_volume() */
300 
301 /*****************************************************************************/
302 /*
303  * pick a server to use to try accessing this volume
304  * - returns with an elevated usage count on the server chosen
305  */
306 int afs_volume_pick_fileserver(struct afs_volume *volume,
307 			       struct afs_server **_server)
308 {
309 	struct afs_server *server;
310 	int ret, state, loop;
311 
312 	_enter("%s", volume->vlocation->vldb.name);
313 
314 	down_read(&volume->server_sem);
315 
316 	/* handle the no-server case */
317 	if (volume->nservers == 0) {
318 		ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
319 		up_read(&volume->server_sem);
320 		_leave(" = %d [no servers]", ret);
321 		return ret;
322 	}
323 
324 	/* basically, just search the list for the first live server and use
325 	 * that */
326 	ret = 0;
327 	for (loop = 0; loop < volume->nservers; loop++) {
328 		server = volume->servers[loop];
329 		state = server->fs_state;
330 
331 		switch (state) {
332 			/* found an apparently healthy server */
333 		case 0:
334 			afs_get_server(server);
335 			up_read(&volume->server_sem);
336 			*_server = server;
337 			_leave(" = 0 (picked %08x)",
338 			       ntohl(server->addr.s_addr));
339 			return 0;
340 
341 		case -ENETUNREACH:
342 			if (ret == 0)
343 				ret = state;
344 			break;
345 
346 		case -EHOSTUNREACH:
347 			if (ret == 0 ||
348 			    ret == -ENETUNREACH)
349 				ret = state;
350 			break;
351 
352 		case -ECONNREFUSED:
353 			if (ret == 0 ||
354 			    ret == -ENETUNREACH ||
355 			    ret == -EHOSTUNREACH)
356 				ret = state;
357 			break;
358 
359 		default:
360 		case -EREMOTEIO:
361 			if (ret == 0 ||
362 			    ret == -ENETUNREACH ||
363 			    ret == -EHOSTUNREACH ||
364 			    ret == -ECONNREFUSED)
365 				ret = state;
366 			break;
367 		}
368 	}
369 
370 	/* no available servers
371 	 * - TODO: handle the no active servers case better
372 	 */
373 	up_read(&volume->server_sem);
374 	_leave(" = %d", ret);
375 	return ret;
376 } /* end afs_volume_pick_fileserver() */
377 
378 /*****************************************************************************/
379 /*
380  * release a server after use
381  * - releases the ref on the server struct that was acquired by picking
382  * - records result of using a particular server to access a volume
383  * - return 0 to try again, 1 if okay or to issue error
384  */
385 int afs_volume_release_fileserver(struct afs_volume *volume,
386 				  struct afs_server *server,
387 				  int result)
388 {
389 	unsigned loop;
390 
391 	_enter("%s,%08x,%d",
392 	       volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
393 	       result);
394 
395 	switch (result) {
396 		/* success */
397 	case 0:
398 		server->fs_act_jif = jiffies;
399 		break;
400 
401 		/* the fileserver denied all knowledge of the volume */
402 	case -ENOMEDIUM:
403 		server->fs_act_jif = jiffies;
404 		down_write(&volume->server_sem);
405 
406 		/* first, find where the server is in the active list (if it
407 		 * is) */
408 		for (loop = 0; loop < volume->nservers; loop++)
409 			if (volume->servers[loop] == server)
410 				goto present;
411 
412 		/* no longer there - may have been discarded by another op */
413 		goto try_next_server_upw;
414 
415 	present:
416 		volume->nservers--;
417 		memmove(&volume->servers[loop],
418 			&volume->servers[loop + 1],
419 			sizeof(volume->servers[loop]) *
420 			(volume->nservers - loop));
421 		volume->servers[volume->nservers] = NULL;
422 		afs_put_server(server);
423 		volume->rjservers++;
424 
425 		if (volume->nservers > 0)
426 			/* another server might acknowledge its existence */
427 			goto try_next_server_upw;
428 
429 		/* handle the case where all the fileservers have rejected the
430 		 * volume
431 		 * - TODO: try asking the fileservers for volume information
432 		 * - TODO: contact the VL server again to see if the volume is
433 		 *         no longer registered
434 		 */
435 		up_write(&volume->server_sem);
436 		afs_put_server(server);
437 		_leave(" [completely rejected]");
438 		return 1;
439 
440 		/* problem reaching the server */
441 	case -ENETUNREACH:
442 	case -EHOSTUNREACH:
443 	case -ECONNREFUSED:
444 	case -ETIMEDOUT:
445 	case -EREMOTEIO:
446 		/* mark the server as dead
447 		 * TODO: vary dead timeout depending on error
448 		 */
449 		spin_lock(&server->fs_lock);
450 		if (!server->fs_state) {
451 			server->fs_dead_jif = jiffies + HZ * 10;
452 			server->fs_state = result;
453 			printk("kAFS: SERVER DEAD state=%d\n", result);
454 		}
455 		spin_unlock(&server->fs_lock);
456 		goto try_next_server;
457 
458 		/* miscellaneous error */
459 	default:
460 		server->fs_act_jif = jiffies;
461 	case -ENOMEM:
462 	case -ENONET:
463 		break;
464 	}
465 
466 	/* tell the caller to accept the result */
467 	afs_put_server(server);
468 	_leave("");
469 	return 1;
470 
471 	/* tell the caller to loop around and try the next server */
472  try_next_server_upw:
473 	up_write(&volume->server_sem);
474  try_next_server:
475 	afs_put_server(server);
476 	_leave(" [try next server]");
477 	return 0;
478 
479 } /* end afs_volume_release_fileserver() */
480 
481 /*****************************************************************************/
482 /*
483  * match a volume hash record stored in the cache
484  */
485 #ifdef AFS_CACHING_SUPPORT
486 static cachefs_match_val_t afs_volume_cache_match(void *target,
487 						  const void *entry)
488 {
489 	const struct afs_cache_vhash *vhash = entry;
490 	struct afs_volume *volume = target;
491 
492 	_enter("{%u},{%u}", volume->type, vhash->vtype);
493 
494 	if (volume->type == vhash->vtype) {
495 		_leave(" = SUCCESS");
496 		return CACHEFS_MATCH_SUCCESS;
497 	}
498 
499 	_leave(" = FAILED");
500 	return CACHEFS_MATCH_FAILED;
501 } /* end afs_volume_cache_match() */
502 #endif
503 
504 /*****************************************************************************/
505 /*
506  * update a volume hash record stored in the cache
507  */
508 #ifdef AFS_CACHING_SUPPORT
509 static void afs_volume_cache_update(void *source, void *entry)
510 {
511 	struct afs_cache_vhash *vhash = entry;
512 	struct afs_volume *volume = source;
513 
514 	_enter("");
515 
516 	vhash->vtype = volume->type;
517 
518 } /* end afs_volume_cache_update() */
519 #endif
520