1 /* AFS server record management 2 * 3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #include <linux/sched.h> 13 #include <linux/slab.h> 14 #include "afs_fs.h" 15 #include "internal.h" 16 17 static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ 18 static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */ 19 20 static void afs_inc_servers_outstanding(struct afs_net *net) 21 { 22 atomic_inc(&net->servers_outstanding); 23 } 24 25 static void afs_dec_servers_outstanding(struct afs_net *net) 26 { 27 if (atomic_dec_and_test(&net->servers_outstanding)) 28 wake_up_var(&net->servers_outstanding); 29 } 30 31 /* 32 * Find a server by one of its addresses. 33 */ 34 struct afs_server *afs_find_server(struct afs_net *net, 35 const struct sockaddr_rxrpc *srx) 36 { 37 const struct sockaddr_in6 *a = &srx->transport.sin6, *b; 38 const struct afs_addr_list *alist; 39 struct afs_server *server = NULL; 40 unsigned int i; 41 bool ipv6 = true; 42 int seq = 0, diff; 43 44 if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 || 45 srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 || 46 srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff)) 47 ipv6 = false; 48 49 rcu_read_lock(); 50 51 do { 52 if (server) 53 afs_put_server(net, server); 54 server = NULL; 55 read_seqbegin_or_lock(&net->fs_addr_lock, &seq); 56 57 if (ipv6) { 58 hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) { 59 alist = rcu_dereference(server->addresses); 60 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { 61 b = &alist->addrs[i].transport.sin6; 62 diff = ((u16 __force)a->sin6_port - 63 (u16 __force)b->sin6_port); 64 if (diff == 0) 65 diff = memcmp(&a->sin6_addr, 66 &b->sin6_addr, 67 sizeof(struct in6_addr)); 68 if (diff == 0) 69 goto found; 70 if (diff < 0) { 71 // TODO: Sort the list 72 //if (i == alist->nr_ipv4) 73 // goto not_found; 74 break; 75 } 76 } 77 } 78 } else { 79 hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) { 80 alist = rcu_dereference(server->addresses); 81 for (i = 0; i < alist->nr_ipv4; i++) { 82 b = &alist->addrs[i].transport.sin6; 83 diff = ((u16 __force)a->sin6_port - 84 (u16 __force)b->sin6_port); 85 if (diff == 0) 86 diff = ((u32 __force)a->sin6_addr.s6_addr32[3] - 87 (u32 __force)b->sin6_addr.s6_addr32[3]); 88 if (diff == 0) 89 goto found; 90 if (diff < 0) { 91 // TODO: Sort the list 92 //if (i == 0) 93 // goto not_found; 94 break; 95 } 96 } 97 } 98 } 99 100 //not_found: 101 server = NULL; 102 found: 103 if (server && !atomic_inc_not_zero(&server->usage)) 104 server = NULL; 105 106 } while (need_seqretry(&net->fs_addr_lock, seq)); 107 108 done_seqretry(&net->fs_addr_lock, seq); 109 110 rcu_read_unlock(); 111 return server; 112 } 113 114 /* 115 * Look up a server by its UUID 116 */ 117 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid) 118 { 119 struct afs_server *server = NULL; 120 struct rb_node *p; 121 int diff, seq = 0; 122 123 _enter("%pU", uuid); 124 125 do { 126 /* Unfortunately, rbtree walking doesn't give reliable results 127 * under just the RCU read lock, so we have to check for 128 * changes. 129 */ 130 if (server) 131 afs_put_server(net, server); 132 server = NULL; 133 134 read_seqbegin_or_lock(&net->fs_lock, &seq); 135 136 p = net->fs_servers.rb_node; 137 while (p) { 138 server = rb_entry(p, struct afs_server, uuid_rb); 139 140 diff = memcmp(uuid, &server->uuid, sizeof(*uuid)); 141 if (diff < 0) { 142 p = p->rb_left; 143 } else if (diff > 0) { 144 p = p->rb_right; 145 } else { 146 afs_get_server(server); 147 break; 148 } 149 150 server = NULL; 151 } 152 } while (need_seqretry(&net->fs_lock, seq)); 153 154 done_seqretry(&net->fs_lock, seq); 155 156 _leave(" = %p", server); 157 return server; 158 } 159 160 /* 161 * Install a server record in the namespace tree 162 */ 163 static struct afs_server *afs_install_server(struct afs_net *net, 164 struct afs_server *candidate) 165 { 166 const struct afs_addr_list *alist; 167 struct afs_server *server; 168 struct rb_node **pp, *p; 169 int ret = -EEXIST, diff; 170 171 _enter("%p", candidate); 172 173 write_seqlock(&net->fs_lock); 174 175 /* Firstly install the server in the UUID lookup tree */ 176 pp = &net->fs_servers.rb_node; 177 p = NULL; 178 while (*pp) { 179 p = *pp; 180 _debug("- consider %p", p); 181 server = rb_entry(p, struct afs_server, uuid_rb); 182 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t)); 183 if (diff < 0) 184 pp = &(*pp)->rb_left; 185 else if (diff > 0) 186 pp = &(*pp)->rb_right; 187 else 188 goto exists; 189 } 190 191 server = candidate; 192 rb_link_node(&server->uuid_rb, p, pp); 193 rb_insert_color(&server->uuid_rb, &net->fs_servers); 194 hlist_add_head_rcu(&server->proc_link, &net->fs_proc); 195 196 write_seqlock(&net->fs_addr_lock); 197 alist = rcu_dereference_protected(server->addresses, 198 lockdep_is_held(&net->fs_addr_lock.lock)); 199 200 /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install 201 * it in the IPv4 and/or IPv6 reverse-map lists. 202 * 203 * TODO: For speed we want to use something other than a flat list 204 * here; even sorting the list in terms of lowest address would help a 205 * bit, but anything we might want to do gets messy and memory 206 * intensive. 207 */ 208 if (alist->nr_ipv4 > 0) 209 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4); 210 if (alist->nr_addrs > alist->nr_ipv4) 211 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6); 212 213 write_sequnlock(&net->fs_addr_lock); 214 ret = 0; 215 216 exists: 217 afs_get_server(server); 218 write_sequnlock(&net->fs_lock); 219 return server; 220 } 221 222 /* 223 * allocate a new server record 224 */ 225 static struct afs_server *afs_alloc_server(struct afs_net *net, 226 const uuid_t *uuid, 227 struct afs_addr_list *alist) 228 { 229 struct afs_server *server; 230 231 _enter(""); 232 233 server = kzalloc(sizeof(struct afs_server), GFP_KERNEL); 234 if (!server) 235 goto enomem; 236 237 atomic_set(&server->usage, 1); 238 RCU_INIT_POINTER(server->addresses, alist); 239 server->addr_version = alist->version; 240 server->uuid = *uuid; 241 server->flags = (1UL << AFS_SERVER_FL_NEW); 242 server->update_at = ktime_get_real_seconds() + afs_server_update_delay; 243 rwlock_init(&server->fs_lock); 244 INIT_LIST_HEAD(&server->cb_interests); 245 rwlock_init(&server->cb_break_lock); 246 247 afs_inc_servers_outstanding(net); 248 _leave(" = %p", server); 249 return server; 250 251 enomem: 252 _leave(" = NULL [nomem]"); 253 return NULL; 254 } 255 256 /* 257 * Look up an address record for a server 258 */ 259 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, 260 struct key *key, const uuid_t *uuid) 261 { 262 struct afs_addr_cursor ac; 263 struct afs_addr_list *alist; 264 int ret; 265 266 ret = afs_set_vl_cursor(&ac, cell); 267 if (ret < 0) 268 return ERR_PTR(ret); 269 270 while (afs_iterate_addresses(&ac)) { 271 if (test_bit(ac.index, &ac.alist->yfs)) 272 alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid); 273 else 274 alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid); 275 switch (ac.error) { 276 case 0: 277 afs_end_cursor(&ac); 278 return alist; 279 case -ECONNABORTED: 280 ac.error = afs_abort_to_error(ac.abort_code); 281 goto error; 282 case -ENOMEM: 283 case -ENONET: 284 goto error; 285 case -ENETUNREACH: 286 case -EHOSTUNREACH: 287 case -ECONNREFUSED: 288 break; 289 default: 290 ac.error = -EIO; 291 goto error; 292 } 293 } 294 295 error: 296 return ERR_PTR(afs_end_cursor(&ac)); 297 } 298 299 /* 300 * Get or create a fileserver record. 301 */ 302 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, 303 const uuid_t *uuid) 304 { 305 struct afs_addr_list *alist; 306 struct afs_server *server, *candidate; 307 308 _enter("%p,%pU", cell->net, uuid); 309 310 server = afs_find_server_by_uuid(cell->net, uuid); 311 if (server) 312 return server; 313 314 alist = afs_vl_lookup_addrs(cell, key, uuid); 315 if (IS_ERR(alist)) 316 return ERR_CAST(alist); 317 318 candidate = afs_alloc_server(cell->net, uuid, alist); 319 if (!candidate) { 320 afs_put_addrlist(alist); 321 return ERR_PTR(-ENOMEM); 322 } 323 324 server = afs_install_server(cell->net, candidate); 325 if (server != candidate) { 326 afs_put_addrlist(alist); 327 kfree(candidate); 328 } 329 330 _leave(" = %p{%d}", server, atomic_read(&server->usage)); 331 return server; 332 } 333 334 /* 335 * Set the server timer to fire after a given delay, assuming it's not already 336 * set for an earlier time. 337 */ 338 static void afs_set_server_timer(struct afs_net *net, time64_t delay) 339 { 340 if (net->live) { 341 afs_inc_servers_outstanding(net); 342 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ)) 343 afs_dec_servers_outstanding(net); 344 } 345 } 346 347 /* 348 * Server management timer. We have an increment on fs_outstanding that we 349 * need to pass along to the work item. 350 */ 351 void afs_servers_timer(struct timer_list *timer) 352 { 353 struct afs_net *net = container_of(timer, struct afs_net, fs_timer); 354 355 _enter(""); 356 if (!queue_work(afs_wq, &net->fs_manager)) 357 afs_dec_servers_outstanding(net); 358 } 359 360 /* 361 * Release a reference on a server record. 362 */ 363 void afs_put_server(struct afs_net *net, struct afs_server *server) 364 { 365 unsigned int usage; 366 367 if (!server) 368 return; 369 370 server->put_time = ktime_get_real_seconds(); 371 372 usage = atomic_dec_return(&server->usage); 373 374 _enter("{%u}", usage); 375 376 if (likely(usage > 0)) 377 return; 378 379 afs_set_server_timer(net, afs_server_gc_delay); 380 } 381 382 static void afs_server_rcu(struct rcu_head *rcu) 383 { 384 struct afs_server *server = container_of(rcu, struct afs_server, rcu); 385 386 afs_put_addrlist(rcu_access_pointer(server->addresses)); 387 kfree(server); 388 } 389 390 /* 391 * destroy a dead server 392 */ 393 static void afs_destroy_server(struct afs_net *net, struct afs_server *server) 394 { 395 struct afs_addr_list *alist = rcu_access_pointer(server->addresses); 396 struct afs_addr_cursor ac = { 397 .alist = alist, 398 .addr = &alist->addrs[0], 399 .start = alist->index, 400 .index = alist->index, 401 .error = 0, 402 }; 403 _enter("%p", server); 404 405 afs_fs_give_up_all_callbacks(net, server, &ac, NULL); 406 call_rcu(&server->rcu, afs_server_rcu); 407 afs_dec_servers_outstanding(net); 408 } 409 410 /* 411 * Garbage collect any expired servers. 412 */ 413 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) 414 { 415 struct afs_server *server; 416 bool deleted; 417 int usage; 418 419 while ((server = gc_list)) { 420 gc_list = server->gc_next; 421 422 write_seqlock(&net->fs_lock); 423 usage = 1; 424 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0); 425 if (deleted) { 426 rb_erase(&server->uuid_rb, &net->fs_servers); 427 hlist_del_rcu(&server->proc_link); 428 } 429 write_sequnlock(&net->fs_lock); 430 431 if (deleted) 432 afs_destroy_server(net, server); 433 } 434 } 435 436 /* 437 * Manage the records of servers known to be within a network namespace. This 438 * includes garbage collecting unused servers. 439 * 440 * Note also that we were given an increment on net->servers_outstanding by 441 * whoever queued us that we need to deal with before returning. 442 */ 443 void afs_manage_servers(struct work_struct *work) 444 { 445 struct afs_net *net = container_of(work, struct afs_net, fs_manager); 446 struct afs_server *gc_list = NULL; 447 struct rb_node *cursor; 448 time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX; 449 bool purging = !net->live; 450 451 _enter(""); 452 453 /* Trawl the server list looking for servers that have expired from 454 * lack of use. 455 */ 456 read_seqlock_excl(&net->fs_lock); 457 458 for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) { 459 struct afs_server *server = 460 rb_entry(cursor, struct afs_server, uuid_rb); 461 int usage = atomic_read(&server->usage); 462 463 _debug("manage %pU %u", &server->uuid, usage); 464 465 ASSERTCMP(usage, >=, 1); 466 ASSERTIFCMP(purging, usage, ==, 1); 467 468 if (usage == 1) { 469 time64_t expire_at = server->put_time; 470 471 if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) && 472 !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags)) 473 expire_at += afs_server_gc_delay; 474 if (purging || expire_at <= now) { 475 server->gc_next = gc_list; 476 gc_list = server; 477 } else if (expire_at < next_manage) { 478 next_manage = expire_at; 479 } 480 } 481 } 482 483 read_sequnlock_excl(&net->fs_lock); 484 485 /* Update the timer on the way out. We have to pass an increment on 486 * servers_outstanding in the namespace that we are in to the timer or 487 * the work scheduler. 488 */ 489 if (!purging && next_manage < TIME64_MAX) { 490 now = ktime_get_real_seconds(); 491 492 if (next_manage - now <= 0) { 493 if (queue_work(afs_wq, &net->fs_manager)) 494 afs_inc_servers_outstanding(net); 495 } else { 496 afs_set_server_timer(net, next_manage - now); 497 } 498 } 499 500 afs_gc_servers(net, gc_list); 501 502 afs_dec_servers_outstanding(net); 503 _leave(" [%d]", atomic_read(&net->servers_outstanding)); 504 } 505 506 static void afs_queue_server_manager(struct afs_net *net) 507 { 508 afs_inc_servers_outstanding(net); 509 if (!queue_work(afs_wq, &net->fs_manager)) 510 afs_dec_servers_outstanding(net); 511 } 512 513 /* 514 * Purge list of servers. 515 */ 516 void afs_purge_servers(struct afs_net *net) 517 { 518 _enter(""); 519 520 if (del_timer_sync(&net->fs_timer)) 521 atomic_dec(&net->servers_outstanding); 522 523 afs_queue_server_manager(net); 524 525 _debug("wait"); 526 wait_var_event(&net->servers_outstanding, 527 !atomic_read(&net->servers_outstanding)); 528 _leave(""); 529 } 530 531 /* 532 * Probe a fileserver to find its capabilities. 533 * 534 * TODO: Try service upgrade. 535 */ 536 static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc) 537 { 538 _enter(""); 539 540 fc->ac.addr = NULL; 541 fc->ac.start = READ_ONCE(fc->ac.alist->index); 542 fc->ac.index = fc->ac.start; 543 fc->ac.error = 0; 544 fc->ac.begun = false; 545 546 while (afs_iterate_addresses(&fc->ac)) { 547 afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server, 548 &fc->ac, fc->key); 549 switch (fc->ac.error) { 550 case 0: 551 afs_end_cursor(&fc->ac); 552 set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags); 553 return true; 554 case -ECONNABORTED: 555 fc->ac.error = afs_abort_to_error(fc->ac.abort_code); 556 goto error; 557 case -ENOMEM: 558 case -ENONET: 559 goto error; 560 case -ENETUNREACH: 561 case -EHOSTUNREACH: 562 case -ECONNREFUSED: 563 case -ETIMEDOUT: 564 case -ETIME: 565 break; 566 default: 567 fc->ac.error = -EIO; 568 goto error; 569 } 570 } 571 572 error: 573 afs_end_cursor(&fc->ac); 574 return false; 575 } 576 577 /* 578 * If we haven't already, try probing the fileserver to get its capabilities. 579 * We try not to instigate parallel probes, but it's possible that the parallel 580 * probes will fail due to authentication failure when ours would succeed. 581 * 582 * TODO: Try sending an anonymous probe if an authenticated probe fails. 583 */ 584 bool afs_probe_fileserver(struct afs_fs_cursor *fc) 585 { 586 bool success; 587 int ret, retries = 0; 588 589 _enter(""); 590 591 retry: 592 if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) { 593 _leave(" = t"); 594 return true; 595 } 596 597 if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) { 598 success = afs_do_probe_fileserver(fc); 599 clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags); 600 wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING); 601 _leave(" = t"); 602 return success; 603 } 604 605 _debug("wait"); 606 ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING, 607 TASK_INTERRUPTIBLE); 608 if (ret == -ERESTARTSYS) { 609 fc->ac.error = ret; 610 _leave(" = f [%d]", ret); 611 return false; 612 } 613 614 retries++; 615 if (retries == 4) { 616 fc->ac.error = -ESTALE; 617 _leave(" = f [stale]"); 618 return false; 619 } 620 _debug("retry"); 621 goto retry; 622 } 623 624 /* 625 * Get an update for a server's address list. 626 */ 627 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server) 628 { 629 struct afs_addr_list *alist, *discard; 630 631 _enter(""); 632 633 alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key, 634 &server->uuid); 635 if (IS_ERR(alist)) { 636 fc->ac.error = PTR_ERR(alist); 637 _leave(" = f [%d]", fc->ac.error); 638 return false; 639 } 640 641 discard = alist; 642 if (server->addr_version != alist->version) { 643 write_lock(&server->fs_lock); 644 discard = rcu_dereference_protected(server->addresses, 645 lockdep_is_held(&server->fs_lock)); 646 rcu_assign_pointer(server->addresses, alist); 647 server->addr_version = alist->version; 648 write_unlock(&server->fs_lock); 649 } 650 651 server->update_at = ktime_get_real_seconds() + afs_server_update_delay; 652 afs_put_addrlist(discard); 653 _leave(" = t"); 654 return true; 655 } 656 657 /* 658 * See if a server's address list needs updating. 659 */ 660 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server) 661 { 662 time64_t now = ktime_get_real_seconds(); 663 long diff; 664 bool success; 665 int ret, retries = 0; 666 667 _enter(""); 668 669 ASSERT(server); 670 671 retry: 672 diff = READ_ONCE(server->update_at) - now; 673 if (diff > 0) { 674 _leave(" = t [not now %ld]", diff); 675 return true; 676 } 677 678 if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) { 679 success = afs_update_server_record(fc, server); 680 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags); 681 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING); 682 _leave(" = %d", success); 683 return success; 684 } 685 686 ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING, 687 TASK_INTERRUPTIBLE); 688 if (ret == -ERESTARTSYS) { 689 fc->ac.error = ret; 690 _leave(" = f [intr]"); 691 return false; 692 } 693 694 retries++; 695 if (retries == 4) { 696 _leave(" = f [stale]"); 697 ret = -ESTALE; 698 return false; 699 } 700 goto retry; 701 } 702