xref: /linux/fs/afs/vl_rotate.c (revision bea00fab2b0e5359ee88a2b127f15a35cd48872b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Handle vlserver selection and rotation.
3  *
4  * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/kernel.h>
9 #include <linux/sched.h>
10 #include <linux/sched/signal.h>
11 #include "internal.h"
12 #include "afs_vl.h"
13 
14 /*
15  * Begin an operation on a volume location server.
16  */
17 bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
18 				  struct key *key)
19 {
20 	static atomic_t debug_ids;
21 
22 	memset(vc, 0, sizeof(*vc));
23 	vc->cell = cell;
24 	vc->key = key;
25 	vc->cumul_error.error = -EDESTADDRREQ;
26 	vc->nr_iterations = -1;
27 
28 	if (signal_pending(current)) {
29 		vc->cumul_error.error = -EINTR;
30 		vc->flags |= AFS_VL_CURSOR_STOP;
31 		return false;
32 	}
33 
34 	vc->debug_id = atomic_inc_return(&debug_ids);
35 	return true;
36 }
37 
38 /*
39  * Begin iteration through a server list, starting with the last used server if
40  * possible, or the last recorded good server if not.
41  */
42 static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
43 {
44 	struct afs_cell *cell = vc->cell;
45 	unsigned int dns_lookup_count;
46 
47 	if (cell->dns_source == DNS_RECORD_UNAVAILABLE ||
48 	    cell->dns_expiry <= ktime_get_real_seconds()) {
49 		dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
50 		set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
51 		afs_queue_cell(cell, afs_cell_trace_get_queue_dns);
52 
53 		if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
54 			if (wait_var_event_interruptible(
55 				    &cell->dns_lookup_count,
56 				    smp_load_acquire(&cell->dns_lookup_count)
57 				    != dns_lookup_count) < 0) {
58 				vc->cumul_error.error = -ERESTARTSYS;
59 				return false;
60 			}
61 		}
62 
63 		/* Status load is ordered after lookup counter load */
64 		if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) {
65 			pr_warn("No record of cell %s\n", cell->name);
66 			vc->cumul_error.error = -ENOENT;
67 			return false;
68 		}
69 
70 		if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
71 			vc->cumul_error.error = -EDESTADDRREQ;
72 			return false;
73 		}
74 	}
75 
76 	read_lock(&cell->vl_servers_lock);
77 	vc->server_list = afs_get_vlserverlist(
78 		rcu_dereference_protected(cell->vl_servers,
79 					  lockdep_is_held(&cell->vl_servers_lock)));
80 	read_unlock(&cell->vl_servers_lock);
81 	if (!vc->server_list->nr_servers)
82 		return false;
83 
84 	vc->untried_servers = (1UL << vc->server_list->nr_servers) - 1;
85 	vc->server_index = -1;
86 	return true;
87 }
88 
89 /*
90  * Select the vlserver to use.  May be called multiple times to rotate
91  * through the vlservers.
92  */
93 bool afs_select_vlserver(struct afs_vl_cursor *vc)
94 {
95 	struct afs_addr_list *alist = vc->alist;
96 	struct afs_vlserver *vlserver;
97 	unsigned long set, failed;
98 	unsigned int rtt;
99 	s32 abort_code = vc->call_abort_code;
100 	int error = vc->call_error, i;
101 
102 	vc->nr_iterations++;
103 
104 	_enter("VC=%x+%x,%d{%lx},%d{%lx},%d,%d",
105 	       vc->debug_id, vc->nr_iterations, vc->server_index, vc->untried_servers,
106 	       vc->addr_index, vc->addr_tried,
107 	       error, abort_code);
108 
109 	if (vc->flags & AFS_VL_CURSOR_STOP) {
110 		_leave(" = f [stopped]");
111 		return false;
112 	}
113 
114 	if (vc->nr_iterations == 0)
115 		goto start;
116 
117 	WRITE_ONCE(alist->addrs[vc->addr_index].last_error, error);
118 
119 	/* Evaluate the result of the previous operation, if there was one. */
120 	switch (error) {
121 	default:
122 	case 0:
123 		/* Success or local failure.  Stop. */
124 		vc->cumul_error.error = error;
125 		vc->flags |= AFS_VL_CURSOR_STOP;
126 		_leave(" = f [okay/local %d]", vc->cumul_error.error);
127 		return false;
128 
129 	case -ECONNABORTED:
130 		/* The far side rejected the operation on some grounds.  This
131 		 * might involve the server being busy or the volume having been moved.
132 		 */
133 		switch (abort_code) {
134 		case AFSVL_IO:
135 		case AFSVL_BADVOLOPER:
136 		case AFSVL_NOMEM:
137 			/* The server went weird. */
138 			afs_prioritise_error(&vc->cumul_error, -EREMOTEIO, abort_code);
139 			//write_lock(&vc->cell->vl_servers_lock);
140 			//vc->server_list->weird_mask |= 1 << vc->server_index;
141 			//write_unlock(&vc->cell->vl_servers_lock);
142 			goto next_server;
143 
144 		default:
145 			afs_prioritise_error(&vc->cumul_error, error, abort_code);
146 			goto failed;
147 		}
148 
149 	case -ERFKILL:
150 	case -EADDRNOTAVAIL:
151 	case -ENETUNREACH:
152 	case -EHOSTUNREACH:
153 	case -EHOSTDOWN:
154 	case -ECONNREFUSED:
155 	case -ETIMEDOUT:
156 	case -ETIME:
157 		_debug("no conn %d", error);
158 		afs_prioritise_error(&vc->cumul_error, error, 0);
159 		goto iterate_address;
160 
161 	case -ECONNRESET:
162 		_debug("call reset");
163 		afs_prioritise_error(&vc->cumul_error, error, 0);
164 		vc->flags |= AFS_VL_CURSOR_RETRY;
165 		goto next_server;
166 
167 	case -EOPNOTSUPP:
168 		_debug("notsupp");
169 		goto next_server;
170 	}
171 
172 restart_from_beginning:
173 	_debug("restart");
174 	if (vc->call_responded &&
175 	    vc->addr_index != vc->alist->preferred &&
176 	    test_bit(alist->preferred, &vc->addr_tried))
177 		WRITE_ONCE(alist->preferred, vc->addr_index);
178 	afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_restart);
179 	alist = vc->alist = NULL;
180 
181 	afs_put_vlserverlist(vc->cell->net, vc->server_list);
182 	vc->server_list = NULL;
183 	if (vc->flags & AFS_VL_CURSOR_RETRIED)
184 		goto failed;
185 	vc->flags |= AFS_VL_CURSOR_RETRIED;
186 start:
187 	_debug("start");
188 	ASSERTCMP(alist, ==, NULL);
189 
190 	if (!afs_start_vl_iteration(vc))
191 		goto failed;
192 
193 	error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
194 	if (error < 0) {
195 		afs_prioritise_error(&vc->cumul_error, error, 0);
196 		goto failed;
197 	}
198 
199 pick_server:
200 	_debug("pick [%lx]", vc->untried_servers);
201 	ASSERTCMP(alist, ==, NULL);
202 
203 	error = afs_wait_for_vl_probes(vc->server_list, vc->untried_servers);
204 	if (error < 0) {
205 		afs_prioritise_error(&vc->cumul_error, error, 0);
206 		goto failed;
207 	}
208 
209 	/* Pick the untried server with the lowest RTT. */
210 	vc->server_index = vc->server_list->preferred;
211 	if (test_bit(vc->server_index, &vc->untried_servers))
212 		goto selected_server;
213 
214 	vc->server_index = -1;
215 	rtt = UINT_MAX;
216 	for (i = 0; i < vc->server_list->nr_servers; i++) {
217 		struct afs_vlserver *s = vc->server_list->servers[i].server;
218 
219 		if (!test_bit(i, &vc->untried_servers) ||
220 		    !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
221 			continue;
222 		if (s->probe.rtt <= rtt) {
223 			vc->server_index = i;
224 			rtt = s->probe.rtt;
225 		}
226 	}
227 
228 	if (vc->server_index == -1)
229 		goto no_more_servers;
230 
231 selected_server:
232 	_debug("use %d", vc->server_index);
233 	__clear_bit(vc->server_index, &vc->untried_servers);
234 
235 	/* We're starting on a different vlserver from the list.  We need to
236 	 * check it, find its address list and probe its capabilities before we
237 	 * use it.
238 	 */
239 	vlserver = vc->server_list->servers[vc->server_index].server;
240 	vc->server = vlserver;
241 
242 	_debug("USING VLSERVER: %s", vlserver->name);
243 
244 	read_lock(&vlserver->lock);
245 	alist = rcu_dereference_protected(vlserver->addresses,
246 					  lockdep_is_held(&vlserver->lock));
247 	vc->alist = afs_get_addrlist(alist, afs_alist_trace_get_vlrotate_set);
248 	read_unlock(&vlserver->lock);
249 
250 	vc->addr_tried = 0;
251 	vc->addr_index = -1;
252 
253 iterate_address:
254 	/* Iterate over the current server's address list to try and find an
255 	 * address on which it will respond to us.
256 	 */
257 	set = READ_ONCE(alist->responded);
258 	failed = READ_ONCE(alist->probe_failed);
259 	vc->addr_index = READ_ONCE(alist->preferred);
260 
261 	_debug("%lx-%lx-%lx,%d", set, failed, vc->addr_tried, vc->addr_index);
262 
263 	set &= ~(failed | vc->addr_tried);
264 
265 	if (!set)
266 		goto next_server;
267 
268 	if (!test_bit(vc->addr_index, &set))
269 		vc->addr_index = __ffs(set);
270 
271 	set_bit(vc->addr_index, &vc->addr_tried);
272 	vc->alist = alist;
273 
274 	_debug("VL address %d/%d", vc->addr_index, alist->nr_addrs);
275 
276 	vc->call_responded = false;
277 	_leave(" = t %pISpc", rxrpc_kernel_remote_addr(alist->addrs[vc->addr_index].peer));
278 	return true;
279 
280 next_server:
281 	_debug("next");
282 	ASSERT(alist);
283 	if (vc->call_responded &&
284 	    vc->addr_index != alist->preferred &&
285 	    test_bit(alist->preferred, &vc->addr_tried))
286 		WRITE_ONCE(alist->preferred, vc->addr_index);
287 	afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_next);
288 	alist = vc->alist = NULL;
289 	goto pick_server;
290 
291 no_more_servers:
292 	/* That's all the servers poked to no good effect.  Try again if some
293 	 * of them were busy.
294 	 */
295 	if (vc->flags & AFS_VL_CURSOR_RETRY)
296 		goto restart_from_beginning;
297 
298 	for (i = 0; i < vc->server_list->nr_servers; i++) {
299 		struct afs_vlserver *s = vc->server_list->servers[i].server;
300 
301 		if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags))
302 			vc->cumul_error.responded = true;
303 		afs_prioritise_error(&vc->cumul_error, READ_ONCE(s->probe.error),
304 				     s->probe.abort_code);
305 	}
306 
307 failed:
308 	if (alist) {
309 		if (vc->call_responded &&
310 		    vc->addr_index != alist->preferred &&
311 		    test_bit(alist->preferred, &vc->addr_tried))
312 			WRITE_ONCE(alist->preferred, vc->addr_index);
313 		afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_fail);
314 		alist = vc->alist = NULL;
315 	}
316 	vc->flags |= AFS_VL_CURSOR_STOP;
317 	_leave(" = f [failed %d]", vc->cumul_error.error);
318 	return false;
319 }
320 
321 /*
322  * Dump cursor state in the case of the error being EDESTADDRREQ.
323  */
324 static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
325 {
326 	struct afs_cell *cell = vc->cell;
327 	static int count;
328 	int i;
329 
330 	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
331 		return;
332 	count++;
333 
334 	rcu_read_lock();
335 	pr_notice("EDESTADDR occurred\n");
336 	pr_notice("CELL: %s err=%d\n", cell->name, cell->error);
337 	pr_notice("DNS: src=%u st=%u lc=%x\n",
338 		  cell->dns_source, cell->dns_status, cell->dns_lookup_count);
339 	pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
340 		  vc->untried_servers, vc->server_index, vc->nr_iterations,
341 		  vc->flags, vc->cumul_error.error);
342 	pr_notice("VC: call  er=%d ac=%d r=%u\n",
343 		  vc->call_error, vc->call_abort_code, vc->call_responded);
344 
345 	if (vc->server_list) {
346 		const struct afs_vlserver_list *sl = vc->server_list;
347 		pr_notice("VC: SL nr=%u ix=%u\n",
348 			  sl->nr_servers, sl->index);
349 		for (i = 0; i < sl->nr_servers; i++) {
350 			const struct afs_vlserver *s = sl->servers[i].server;
351 			pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
352 				  s->name, s->port, s->flags, s->probe.error);
353 			if (s->addresses) {
354 				const struct afs_addr_list *a =
355 					rcu_dereference(s->addresses);
356 				pr_notice("VC:  - nr=%u/%u/%u pf=%u\n",
357 					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
358 					  a->preferred);
359 				pr_notice("VC:  - R=%lx F=%lx\n",
360 					  a->responded, a->probe_failed);
361 				if (a == vc->alist)
362 					pr_notice("VC:  - current\n");
363 			}
364 		}
365 	}
366 
367 	pr_notice("AC: t=%lx ax=%u\n", vc->addr_tried, vc->addr_index);
368 	rcu_read_unlock();
369 }
370 
371 /*
372  * Tidy up a volume location server cursor and unlock the vnode.
373  */
374 int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
375 {
376 	struct afs_net *net = vc->cell->net;
377 
378 	_enter("VC=%x+%x", vc->debug_id, vc->nr_iterations);
379 
380 	switch (vc->cumul_error.error) {
381 	case -EDESTADDRREQ:
382 	case -EADDRNOTAVAIL:
383 	case -ENETUNREACH:
384 	case -EHOSTUNREACH:
385 		afs_vl_dump_edestaddrreq(vc);
386 		break;
387 	}
388 
389 	if (vc->alist) {
390 		if (vc->call_responded &&
391 		    vc->addr_index != vc->alist->preferred &&
392 		    test_bit(vc->alist->preferred, &vc->addr_tried))
393 			WRITE_ONCE(vc->alist->preferred, vc->addr_index);
394 		afs_put_addrlist(vc->alist, afs_alist_trace_put_vlrotate_end);
395 		vc->alist = NULL;
396 	}
397 	afs_put_vlserverlist(net, vc->server_list);
398 	return vc->cumul_error.error;
399 }
400