1 /* Handle vlserver selection and rotation. 2 * 3 * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public Licence 8 * as published by the Free Software Foundation; either version 9 * 2 of the Licence, or (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/sched.h> 14 #include <linux/sched/signal.h> 15 #include "internal.h" 16 #include "afs_vl.h" 17 18 /* 19 * Begin an operation on a volume location server. 20 */ 21 bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, 22 struct key *key) 23 { 24 memset(vc, 0, sizeof(*vc)); 25 vc->cell = cell; 26 vc->key = key; 27 vc->error = -EDESTADDRREQ; 28 vc->ac.error = SHRT_MAX; 29 30 if (signal_pending(current)) { 31 vc->error = -EINTR; 32 vc->flags |= AFS_VL_CURSOR_STOP; 33 return false; 34 } 35 36 return true; 37 } 38 39 /* 40 * Begin iteration through a server list, starting with the last used server if 41 * possible, or the last recorded good server if not. 42 */ 43 static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) 44 { 45 struct afs_cell *cell = vc->cell; 46 47 if (wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET, 48 TASK_INTERRUPTIBLE)) { 49 vc->error = -ERESTARTSYS; 50 return false; 51 } 52 53 read_lock(&cell->vl_servers_lock); 54 vc->server_list = afs_get_vlserverlist( 55 rcu_dereference_protected(cell->vl_servers, 56 lockdep_is_held(&cell->vl_servers_lock))); 57 read_unlock(&cell->vl_servers_lock); 58 if (!vc->server_list || !vc->server_list->nr_servers) 59 return false; 60 61 vc->untried = (1UL << vc->server_list->nr_servers) - 1; 62 vc->index = -1; 63 return true; 64 } 65 66 /* 67 * Select the vlserver to use. May be called multiple times to rotate 68 * through the vlservers. 69 */ 70 bool afs_select_vlserver(struct afs_vl_cursor *vc) 71 { 72 struct afs_addr_list *alist; 73 struct afs_vlserver *vlserver; 74 struct afs_error e; 75 u32 rtt; 76 int error = vc->ac.error, i; 77 78 _enter("%lx[%d],%lx[%d],%d,%d", 79 vc->untried, vc->index, 80 vc->ac.tried, vc->ac.index, 81 error, vc->ac.abort_code); 82 83 if (vc->flags & AFS_VL_CURSOR_STOP) { 84 _leave(" = f [stopped]"); 85 return false; 86 } 87 88 vc->nr_iterations++; 89 90 /* Evaluate the result of the previous operation, if there was one. */ 91 switch (error) { 92 case SHRT_MAX: 93 goto start; 94 95 default: 96 case 0: 97 /* Success or local failure. Stop. */ 98 vc->error = error; 99 vc->flags |= AFS_VL_CURSOR_STOP; 100 _leave(" = f [okay/local %d]", vc->ac.error); 101 return false; 102 103 case -ECONNABORTED: 104 /* The far side rejected the operation on some grounds. This 105 * might involve the server being busy or the volume having been moved. 106 */ 107 switch (vc->ac.abort_code) { 108 case AFSVL_IO: 109 case AFSVL_BADVOLOPER: 110 case AFSVL_NOMEM: 111 /* The server went weird. */ 112 vc->error = -EREMOTEIO; 113 //write_lock(&vc->cell->vl_servers_lock); 114 //vc->server_list->weird_mask |= 1 << vc->index; 115 //write_unlock(&vc->cell->vl_servers_lock); 116 goto next_server; 117 118 default: 119 vc->error = afs_abort_to_error(vc->ac.abort_code); 120 goto failed; 121 } 122 123 case -ERFKILL: 124 case -EADDRNOTAVAIL: 125 case -ENETUNREACH: 126 case -EHOSTUNREACH: 127 case -EHOSTDOWN: 128 case -ECONNREFUSED: 129 case -ETIMEDOUT: 130 case -ETIME: 131 _debug("no conn %d", error); 132 vc->error = error; 133 goto iterate_address; 134 135 case -ECONNRESET: 136 _debug("call reset"); 137 vc->error = error; 138 vc->flags |= AFS_VL_CURSOR_RETRY; 139 goto next_server; 140 } 141 142 restart_from_beginning: 143 _debug("restart"); 144 afs_end_cursor(&vc->ac); 145 afs_put_vlserverlist(vc->cell->net, vc->server_list); 146 vc->server_list = NULL; 147 if (vc->flags & AFS_VL_CURSOR_RETRIED) 148 goto failed; 149 vc->flags |= AFS_VL_CURSOR_RETRIED; 150 start: 151 _debug("start"); 152 153 if (!afs_start_vl_iteration(vc)) 154 goto failed; 155 156 error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list); 157 if (error < 0) 158 goto failed_set_error; 159 160 pick_server: 161 _debug("pick [%lx]", vc->untried); 162 163 error = afs_wait_for_vl_probes(vc->server_list, vc->untried); 164 if (error < 0) 165 goto failed_set_error; 166 167 /* Pick the untried server with the lowest RTT. */ 168 vc->index = vc->server_list->preferred; 169 if (test_bit(vc->index, &vc->untried)) 170 goto selected_server; 171 172 vc->index = -1; 173 rtt = U32_MAX; 174 for (i = 0; i < vc->server_list->nr_servers; i++) { 175 struct afs_vlserver *s = vc->server_list->servers[i].server; 176 177 if (!test_bit(i, &vc->untried) || !s->probe.responded) 178 continue; 179 if (s->probe.rtt < rtt) { 180 vc->index = i; 181 rtt = s->probe.rtt; 182 } 183 } 184 185 if (vc->index == -1) 186 goto no_more_servers; 187 188 selected_server: 189 _debug("use %d", vc->index); 190 __clear_bit(vc->index, &vc->untried); 191 192 /* We're starting on a different vlserver from the list. We need to 193 * check it, find its address list and probe its capabilities before we 194 * use it. 195 */ 196 ASSERTCMP(vc->ac.alist, ==, NULL); 197 vlserver = vc->server_list->servers[vc->index].server; 198 vc->server = vlserver; 199 200 _debug("USING VLSERVER: %s", vlserver->name); 201 202 read_lock(&vlserver->lock); 203 alist = rcu_dereference_protected(vlserver->addresses, 204 lockdep_is_held(&vlserver->lock)); 205 afs_get_addrlist(alist); 206 read_unlock(&vlserver->lock); 207 208 memset(&vc->ac, 0, sizeof(vc->ac)); 209 210 if (!vc->ac.alist) 211 vc->ac.alist = alist; 212 else 213 afs_put_addrlist(alist); 214 215 vc->ac.index = -1; 216 217 iterate_address: 218 ASSERT(vc->ac.alist); 219 /* Iterate over the current server's address list to try and find an 220 * address on which it will respond to us. 221 */ 222 if (!afs_iterate_addresses(&vc->ac)) 223 goto next_server; 224 225 _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs); 226 227 _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport); 228 return true; 229 230 next_server: 231 _debug("next"); 232 afs_end_cursor(&vc->ac); 233 goto pick_server; 234 235 no_more_servers: 236 /* That's all the servers poked to no good effect. Try again if some 237 * of them were busy. 238 */ 239 if (vc->flags & AFS_VL_CURSOR_RETRY) 240 goto restart_from_beginning; 241 242 e.error = -EDESTADDRREQ; 243 e.responded = false; 244 for (i = 0; i < vc->server_list->nr_servers; i++) { 245 struct afs_vlserver *s = vc->server_list->servers[i].server; 246 247 afs_prioritise_error(&e, READ_ONCE(s->probe.error), 248 s->probe.abort_code); 249 } 250 251 failed_set_error: 252 vc->error = error; 253 failed: 254 vc->flags |= AFS_VL_CURSOR_STOP; 255 afs_end_cursor(&vc->ac); 256 _leave(" = f [failed %d]", vc->error); 257 return false; 258 } 259 260 /* 261 * Dump cursor state in the case of the error being EDESTADDRREQ. 262 */ 263 static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) 264 { 265 static int count; 266 int i; 267 268 if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) 269 return; 270 count++; 271 272 rcu_read_lock(); 273 pr_notice("EDESTADDR occurred\n"); 274 pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", 275 vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error); 276 277 if (vc->server_list) { 278 const struct afs_vlserver_list *sl = vc->server_list; 279 pr_notice("VC: SL nr=%u ix=%u\n", 280 sl->nr_servers, sl->index); 281 for (i = 0; i < sl->nr_servers; i++) { 282 const struct afs_vlserver *s = sl->servers[i].server; 283 pr_notice("VC: server %s+%hu fl=%lx E=%hd\n", 284 s->name, s->port, s->flags, s->probe.error); 285 if (s->addresses) { 286 const struct afs_addr_list *a = 287 rcu_dereference(s->addresses); 288 pr_notice("VC: - nr=%u/%u/%u pf=%u\n", 289 a->nr_ipv4, a->nr_addrs, a->max_addrs, 290 a->preferred); 291 pr_notice("VC: - pr=%lx R=%lx F=%lx\n", 292 a->probed, a->responded, a->failed); 293 if (a == vc->ac.alist) 294 pr_notice("VC: - current\n"); 295 } 296 } 297 } 298 299 pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", 300 vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error, 301 vc->ac.responded, vc->ac.nr_iterations); 302 rcu_read_unlock(); 303 } 304 305 /* 306 * Tidy up a volume location server cursor and unlock the vnode. 307 */ 308 int afs_end_vlserver_operation(struct afs_vl_cursor *vc) 309 { 310 struct afs_net *net = vc->cell->net; 311 312 if (vc->error == -EDESTADDRREQ || 313 vc->error == -EADDRNOTAVAIL || 314 vc->error == -ENETUNREACH || 315 vc->error == -EHOSTUNREACH) 316 afs_vl_dump_edestaddrreq(vc); 317 318 afs_end_cursor(&vc->ac); 319 afs_put_vlserverlist(net, vc->server_list); 320 321 if (vc->error == -ECONNABORTED) 322 vc->error = afs_abort_to_error(vc->ac.abort_code); 323 324 return vc->error; 325 } 326