1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Handle vlserver selection and rotation. 3 * 4 * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/sched.h> 10 #include <linux/sched/signal.h> 11 #include "internal.h" 12 #include "afs_vl.h" 13 14 /* 15 * Begin an operation on a volume location server. 16 */ 17 bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell, 18 struct key *key) 19 { 20 static atomic_t debug_ids; 21 22 memset(vc, 0, sizeof(*vc)); 23 vc->cell = cell; 24 vc->key = key; 25 vc->cumul_error.error = -EDESTADDRREQ; 26 vc->nr_iterations = -1; 27 28 if (signal_pending(current)) { 29 vc->cumul_error.error = -EINTR; 30 vc->flags |= AFS_VL_CURSOR_STOP; 31 return false; 32 } 33 34 vc->debug_id = atomic_inc_return(&debug_ids); 35 return true; 36 } 37 38 /* 39 * Begin iteration through a server list, starting with the last used server if 40 * possible, or the last recorded good server if not. 41 */ 42 static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) 43 { 44 struct afs_cell *cell = vc->cell; 45 unsigned int dns_lookup_count; 46 47 if (cell->dns_source == DNS_RECORD_UNAVAILABLE || 48 cell->dns_expiry <= ktime_get_real_seconds()) { 49 dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count); 50 set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags); 51 afs_queue_cell(cell, afs_cell_trace_get_queue_dns); 52 53 if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { 54 if (wait_var_event_interruptible( 55 &cell->dns_lookup_count, 56 smp_load_acquire(&cell->dns_lookup_count) 57 != dns_lookup_count) < 0) { 58 vc->cumul_error.error = -ERESTARTSYS; 59 return false; 60 } 61 } 62 63 /* Status load is ordered after lookup counter load */ 64 if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) { 65 pr_warn("No record of cell %s\n", cell->name); 66 vc->cumul_error.error = -ENOENT; 67 return false; 68 } 69 70 if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { 71 vc->cumul_error.error = -EDESTADDRREQ; 72 return false; 73 } 74 } 75 76 read_lock(&cell->vl_servers_lock); 77 vc->server_list = afs_get_vlserverlist( 78 rcu_dereference_protected(cell->vl_servers, 79 lockdep_is_held(&cell->vl_servers_lock))); 80 read_unlock(&cell->vl_servers_lock); 81 if (!vc->server_list->nr_servers) 82 return false; 83 84 vc->untried_servers = (1UL << vc->server_list->nr_servers) - 1; 85 vc->server_index = -1; 86 return true; 87 } 88 89 /* 90 * Select the vlserver to use. May be called multiple times to rotate 91 * through the vlservers. 92 */ 93 bool afs_select_vlserver(struct afs_vl_cursor *vc) 94 { 95 struct afs_addr_list *alist = vc->alist; 96 struct afs_vlserver *vlserver; 97 unsigned long set, failed; 98 unsigned int rtt; 99 s32 abort_code = vc->call_abort_code; 100 int error = vc->call_error, i; 101 102 vc->nr_iterations++; 103 104 _enter("VC=%x+%x,%d{%lx},%d{%lx},%d,%d", 105 vc->debug_id, vc->nr_iterations, vc->server_index, vc->untried_servers, 106 vc->addr_index, vc->addr_tried, 107 error, abort_code); 108 109 if (vc->flags & AFS_VL_CURSOR_STOP) { 110 _leave(" = f [stopped]"); 111 return false; 112 } 113 114 if (vc->nr_iterations == 0) 115 goto start; 116 117 WRITE_ONCE(alist->addrs[vc->addr_index].last_error, error); 118 119 /* Evaluate the result of the previous operation, if there was one. */ 120 switch (error) { 121 default: 122 case 0: 123 /* Success or local failure. Stop. */ 124 vc->cumul_error.error = error; 125 vc->flags |= AFS_VL_CURSOR_STOP; 126 _leave(" = f [okay/local %d]", vc->cumul_error.error); 127 return false; 128 129 case -ECONNABORTED: 130 /* The far side rejected the operation on some grounds. This 131 * might involve the server being busy or the volume having been moved. 132 */ 133 switch (abort_code) { 134 case AFSVL_IO: 135 case AFSVL_BADVOLOPER: 136 case AFSVL_NOMEM: 137 /* The server went weird. */ 138 afs_prioritise_error(&vc->cumul_error, -EREMOTEIO, abort_code); 139 //write_lock(&vc->cell->vl_servers_lock); 140 //vc->server_list->weird_mask |= 1 << vc->server_index; 141 //write_unlock(&vc->cell->vl_servers_lock); 142 goto next_server; 143 144 default: 145 afs_prioritise_error(&vc->cumul_error, error, abort_code); 146 goto failed; 147 } 148 149 case -ERFKILL: 150 case -EADDRNOTAVAIL: 151 case -ENETUNREACH: 152 case -EHOSTUNREACH: 153 case -EHOSTDOWN: 154 case -ECONNREFUSED: 155 case -ETIMEDOUT: 156 case -ETIME: 157 _debug("no conn %d", error); 158 afs_prioritise_error(&vc->cumul_error, error, 0); 159 goto iterate_address; 160 161 case -ECONNRESET: 162 _debug("call reset"); 163 afs_prioritise_error(&vc->cumul_error, error, 0); 164 vc->flags |= AFS_VL_CURSOR_RETRY; 165 goto next_server; 166 167 case -EOPNOTSUPP: 168 _debug("notsupp"); 169 goto next_server; 170 } 171 172 restart_from_beginning: 173 _debug("restart"); 174 if (vc->call_responded && 175 vc->addr_index != vc->alist->preferred && 176 test_bit(alist->preferred, &vc->addr_tried)) 177 WRITE_ONCE(alist->preferred, vc->addr_index); 178 afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_restart); 179 alist = vc->alist = NULL; 180 181 afs_put_vlserverlist(vc->cell->net, vc->server_list); 182 vc->server_list = NULL; 183 if (vc->flags & AFS_VL_CURSOR_RETRIED) 184 goto failed; 185 vc->flags |= AFS_VL_CURSOR_RETRIED; 186 start: 187 _debug("start"); 188 ASSERTCMP(alist, ==, NULL); 189 190 if (!afs_start_vl_iteration(vc)) 191 goto failed; 192 193 error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list); 194 if (error < 0) { 195 afs_prioritise_error(&vc->cumul_error, error, 0); 196 goto failed; 197 } 198 199 pick_server: 200 _debug("pick [%lx]", vc->untried_servers); 201 ASSERTCMP(alist, ==, NULL); 202 203 error = afs_wait_for_vl_probes(vc->server_list, vc->untried_servers); 204 if (error < 0) { 205 afs_prioritise_error(&vc->cumul_error, error, 0); 206 goto failed; 207 } 208 209 /* Pick the untried server with the lowest RTT. */ 210 vc->server_index = vc->server_list->preferred; 211 if (test_bit(vc->server_index, &vc->untried_servers)) 212 goto selected_server; 213 214 vc->server_index = -1; 215 rtt = UINT_MAX; 216 for (i = 0; i < vc->server_list->nr_servers; i++) { 217 struct afs_vlserver *s = vc->server_list->servers[i].server; 218 219 if (!test_bit(i, &vc->untried_servers) || 220 !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) 221 continue; 222 if (s->probe.rtt <= rtt) { 223 vc->server_index = i; 224 rtt = s->probe.rtt; 225 } 226 } 227 228 if (vc->server_index == -1) 229 goto no_more_servers; 230 231 selected_server: 232 _debug("use %d", vc->server_index); 233 __clear_bit(vc->server_index, &vc->untried_servers); 234 235 /* We're starting on a different vlserver from the list. We need to 236 * check it, find its address list and probe its capabilities before we 237 * use it. 238 */ 239 vlserver = vc->server_list->servers[vc->server_index].server; 240 vc->server = vlserver; 241 242 _debug("USING VLSERVER: %s", vlserver->name); 243 244 read_lock(&vlserver->lock); 245 alist = rcu_dereference_protected(vlserver->addresses, 246 lockdep_is_held(&vlserver->lock)); 247 vc->alist = afs_get_addrlist(alist, afs_alist_trace_get_vlrotate_set); 248 read_unlock(&vlserver->lock); 249 250 vc->addr_tried = 0; 251 vc->addr_index = -1; 252 253 iterate_address: 254 /* Iterate over the current server's address list to try and find an 255 * address on which it will respond to us. 256 */ 257 set = READ_ONCE(alist->responded); 258 failed = READ_ONCE(alist->probe_failed); 259 vc->addr_index = READ_ONCE(alist->preferred); 260 261 _debug("%lx-%lx-%lx,%d", set, failed, vc->addr_tried, vc->addr_index); 262 263 set &= ~(failed | vc->addr_tried); 264 265 if (!set) 266 goto next_server; 267 268 if (!test_bit(vc->addr_index, &set)) 269 vc->addr_index = __ffs(set); 270 271 set_bit(vc->addr_index, &vc->addr_tried); 272 vc->alist = alist; 273 274 _debug("VL address %d/%d", vc->addr_index, alist->nr_addrs); 275 276 vc->call_responded = false; 277 _leave(" = t %pISpc", rxrpc_kernel_remote_addr(alist->addrs[vc->addr_index].peer)); 278 return true; 279 280 next_server: 281 _debug("next"); 282 ASSERT(alist); 283 if (vc->call_responded && 284 vc->addr_index != alist->preferred && 285 test_bit(alist->preferred, &vc->addr_tried)) 286 WRITE_ONCE(alist->preferred, vc->addr_index); 287 afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_next); 288 alist = vc->alist = NULL; 289 goto pick_server; 290 291 no_more_servers: 292 /* That's all the servers poked to no good effect. Try again if some 293 * of them were busy. 294 */ 295 if (vc->flags & AFS_VL_CURSOR_RETRY) 296 goto restart_from_beginning; 297 298 for (i = 0; i < vc->server_list->nr_servers; i++) { 299 struct afs_vlserver *s = vc->server_list->servers[i].server; 300 301 if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) 302 vc->cumul_error.responded = true; 303 afs_prioritise_error(&vc->cumul_error, READ_ONCE(s->probe.error), 304 s->probe.abort_code); 305 } 306 307 failed: 308 if (alist) { 309 if (vc->call_responded && 310 vc->addr_index != alist->preferred && 311 test_bit(alist->preferred, &vc->addr_tried)) 312 WRITE_ONCE(alist->preferred, vc->addr_index); 313 afs_put_addrlist(alist, afs_alist_trace_put_vlrotate_fail); 314 alist = vc->alist = NULL; 315 } 316 vc->flags |= AFS_VL_CURSOR_STOP; 317 _leave(" = f [failed %d]", vc->cumul_error.error); 318 return false; 319 } 320 321 /* 322 * Dump cursor state in the case of the error being EDESTADDRREQ. 323 */ 324 static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) 325 { 326 struct afs_cell *cell = vc->cell; 327 static int count; 328 int i; 329 330 if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3) 331 return; 332 count++; 333 334 rcu_read_lock(); 335 pr_notice("EDESTADDR occurred\n"); 336 pr_notice("CELL: %s err=%d\n", cell->name, cell->error); 337 pr_notice("DNS: src=%u st=%u lc=%x\n", 338 cell->dns_source, cell->dns_status, cell->dns_lookup_count); 339 pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", 340 vc->untried_servers, vc->server_index, vc->nr_iterations, 341 vc->flags, vc->cumul_error.error); 342 pr_notice("VC: call er=%d ac=%d r=%u\n", 343 vc->call_error, vc->call_abort_code, vc->call_responded); 344 345 if (vc->server_list) { 346 const struct afs_vlserver_list *sl = vc->server_list; 347 pr_notice("VC: SL nr=%u ix=%u\n", 348 sl->nr_servers, sl->index); 349 for (i = 0; i < sl->nr_servers; i++) { 350 const struct afs_vlserver *s = sl->servers[i].server; 351 pr_notice("VC: server %s+%hu fl=%lx E=%hd\n", 352 s->name, s->port, s->flags, s->probe.error); 353 if (s->addresses) { 354 const struct afs_addr_list *a = 355 rcu_dereference(s->addresses); 356 pr_notice("VC: - nr=%u/%u/%u pf=%u\n", 357 a->nr_ipv4, a->nr_addrs, a->max_addrs, 358 a->preferred); 359 pr_notice("VC: - R=%lx F=%lx\n", 360 a->responded, a->probe_failed); 361 if (a == vc->alist) 362 pr_notice("VC: - current\n"); 363 } 364 } 365 } 366 367 pr_notice("AC: t=%lx ax=%u\n", vc->addr_tried, vc->addr_index); 368 rcu_read_unlock(); 369 } 370 371 /* 372 * Tidy up a volume location server cursor and unlock the vnode. 373 */ 374 int afs_end_vlserver_operation(struct afs_vl_cursor *vc) 375 { 376 struct afs_net *net = vc->cell->net; 377 378 _enter("VC=%x+%x", vc->debug_id, vc->nr_iterations); 379 380 switch (vc->cumul_error.error) { 381 case -EDESTADDRREQ: 382 case -EADDRNOTAVAIL: 383 case -ENETUNREACH: 384 case -EHOSTUNREACH: 385 afs_vl_dump_edestaddrreq(vc); 386 break; 387 } 388 389 if (vc->alist) { 390 if (vc->call_responded && 391 vc->addr_index != vc->alist->preferred && 392 test_bit(vc->alist->preferred, &vc->addr_tried)) 393 WRITE_ONCE(vc->alist->preferred, vc->addr_index); 394 afs_put_addrlist(vc->alist, afs_alist_trace_put_vlrotate_end); 395 vc->alist = NULL; 396 } 397 afs_put_vlserverlist(net, vc->server_list); 398 return vc->cumul_error.error; 399 } 400