1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * linux/include/linux/sunrpc/svc.h 4 * 5 * RPC server declarations. 6 * 7 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 8 */ 9 10 11 #ifndef SUNRPC_SVC_H 12 #define SUNRPC_SVC_H 13 14 #include <linux/in.h> 15 #include <linux/in6.h> 16 #include <linux/sunrpc/types.h> 17 #include <linux/sunrpc/xdr.h> 18 #include <linux/sunrpc/auth.h> 19 #include <linux/sunrpc/svcauth.h> 20 #include <linux/lwq.h> 21 #include <linux/wait.h> 22 #include <linux/mm.h> 23 #include <linux/pagevec.h> 24 #include <linux/kthread.h> 25 26 /* 27 * 28 * RPC service thread pool. 29 * 30 * Pool of threads and temporary sockets. Generally there is only 31 * a single one of these per RPC service, but on NUMA machines those 32 * services that can benefit from it (i.e. nfs but not lockd) will 33 * have one pool per NUMA node. This optimisation reduces cross- 34 * node traffic on multi-node NUMA NFS servers. 35 */ 36 struct svc_pool { 37 unsigned int sp_id; /* pool id; also node id on NUMA */ 38 unsigned int sp_nrthreads; /* # of threads currently running in pool */ 39 unsigned int sp_nrthrmin; /* Min number of threads to run per pool */ 40 unsigned int sp_nrthrmax; /* Max requested number of threads in pool */ 41 struct lwq sp_xprts; /* pending transports */ 42 struct list_head sp_all_threads; /* all server threads */ 43 struct llist_head sp_idle_threads; /* idle server threads */ 44 45 /* statistics on pool operation */ 46 struct percpu_counter sp_messages_arrived; 47 struct percpu_counter sp_sockets_queued; 48 struct percpu_counter sp_threads_woken; 49 50 unsigned long sp_flags; 51 } ____cacheline_aligned_in_smp; 52 53 /* bits for sp_flags */ 54 enum { 55 SP_TASK_PENDING, /* still work to do even if no xprt is queued */ 56 SP_NEED_VICTIM, /* One thread needs to agree to exit */ 57 SP_VICTIM_REMAINS, /* One thread needs to actually exit */ 58 SP_TASK_STARTING, /* Task has started but not added to idle yet */ 59 }; 60 61 62 /* 63 * RPC service. 64 * 65 * An RPC service is a ``daemon,'' possibly multithreaded, which 66 * receives and processes incoming RPC messages. 67 * It has one or more transport sockets associated with it, and maintains 68 * a list of idle threads waiting for input. 69 * 70 * We currently do not support more than one RPC program per daemon. 71 */ 72 struct svc_serv { 73 struct svc_program * sv_programs; /* RPC programs */ 74 struct svc_stat * sv_stats; /* RPC statistics */ 75 spinlock_t sv_lock; 76 unsigned int sv_nprogs; /* Number of sv_programs */ 77 unsigned int sv_nrthreads; /* # of running server threads */ 78 unsigned int sv_max_payload; /* datagram payload size */ 79 unsigned int sv_max_mesg; /* max_payload + 1 page for overheads */ 80 unsigned int sv_xdrsize; /* XDR buffer size */ 81 struct list_head sv_permsocks; /* all permanent sockets */ 82 struct list_head sv_tempsocks; /* all temporary sockets */ 83 int sv_tmpcnt; /* count of temporary "valid" sockets */ 84 struct timer_list sv_temptimer; /* timer for aging temporary sockets */ 85 86 char * sv_name; /* service name */ 87 88 unsigned int sv_nrpools; /* number of thread pools */ 89 bool sv_is_pooled; /* is this a pooled service? */ 90 struct svc_pool * sv_pools; /* array of thread pools */ 91 int (*sv_threadfn)(void *data); 92 93 #if defined(CONFIG_SUNRPC_BACKCHANNEL) 94 struct lwq sv_cb_list; /* queue for callback requests 95 * that arrive over the same 96 * connection */ 97 bool sv_bc_enabled; /* service uses backchannel */ 98 #endif /* CONFIG_SUNRPC_BACKCHANNEL */ 99 }; 100 101 /* This is used by pool_stats to find and lock an svc */ 102 struct svc_info { 103 struct svc_serv *serv; 104 struct mutex *mutex; 105 }; 106 107 void svc_destroy(struct svc_serv **svcp); 108 109 /* 110 * Maximum payload size supported by a kernel RPC server. 111 * This is use to determine the max number of pages nfsd is 112 * willing to return in a single READ operation. 113 * 114 * These happen to all be powers of 2, which is not strictly 115 * necessary but helps enforce the real limitation, which is 116 * that they should be multiples of PAGE_SIZE. 117 * 118 * For UDP transports, a block plus NFS,RPC, and UDP headers 119 * has to fit into the IP datagram limit of 64K. The largest 120 * feasible number for all known page sizes is probably 48K, 121 * but we choose 32K here. This is the same as the historical 122 * Linux limit; someone who cares more about NFS/UDP performance 123 * can test a larger number. 124 * 125 * For non-UDP transports we have more freedom. A size of 4MB is 126 * chosen to accommodate clients that support larger I/O sizes. 127 */ 128 enum { 129 RPCSVC_MAXPAYLOAD = 4 * 1024 * 1024, 130 RPCSVC_MAXPAYLOAD_TCP = RPCSVC_MAXPAYLOAD, 131 RPCSVC_MAXPAYLOAD_UDP = 32 * 1024, 132 }; 133 134 extern u32 svc_max_payload(const struct svc_rqst *rqstp); 135 136 /* 137 * RPC Requests and replies are stored in one or more pages. 138 * We maintain an array of pages for each server thread. 139 * Requests are copied into these pages as they arrive. Remaining 140 * pages are available to write the reply into. 141 * 142 * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each server thread 143 * needs to allocate more to replace those used in sending. To help keep track 144 * of these pages we have a receive list where all pages initialy live, and a 145 * send list where pages are moved to when there are to be part of a reply. 146 * 147 * We use xdr_buf for holding responses as it fits well with NFS 148 * read responses (that have a header, and some data pages, and possibly 149 * a tail) and means we can share some client side routines. 150 * 151 * The xdr_buf.head kvec always points to the first page in the rq_*pages 152 * list. The xdr_buf.pages pointer points to the second page on that 153 * list. xdr_buf.tail points to the end of the first page. 154 * This assumes that the non-page part of an rpc reply will fit 155 * in a page - NFSd ensures this. lockd also has no trouble. 156 */ 157 158 /** 159 * svc_serv_maxpages - maximum count of pages needed for one RPC message 160 * @serv: RPC service context 161 * 162 * Returns a count of pages or vectors that can hold the maximum 163 * size RPC message for @serv. 164 * 165 * Each request/reply pair can have at most one "payload", plus two 166 * pages, one for the request, and one for the reply. 167 * nfsd_splice_actor() might need an extra page when a READ payload 168 * is not page-aligned. 169 */ 170 static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv) 171 { 172 return DIV_ROUND_UP(serv->sv_max_mesg, PAGE_SIZE) + 2 + 1; 173 } 174 175 /* 176 * The context of a single thread, including the request currently being 177 * processed. 178 */ 179 struct svc_rqst { 180 struct list_head rq_all; /* all threads list */ 181 struct llist_node rq_idle; /* On the idle list */ 182 struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ 183 struct svc_xprt * rq_xprt; /* transport ptr */ 184 185 struct sockaddr_storage rq_addr; /* peer address */ 186 size_t rq_addrlen; 187 struct sockaddr_storage rq_daddr; /* dest addr of request 188 * - reply from here */ 189 size_t rq_daddrlen; 190 191 struct svc_serv * rq_server; /* RPC service definition */ 192 struct svc_pool * rq_pool; /* thread pool */ 193 const struct svc_procedure *rq_procinfo;/* procedure info */ 194 struct auth_ops * rq_authop; /* authentication flavour */ 195 struct svc_cred rq_cred; /* auth info */ 196 void * rq_xprt_ctxt; /* transport specific context ptr */ 197 struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ 198 199 struct xdr_buf rq_arg; 200 struct xdr_stream rq_arg_stream; 201 struct xdr_stream rq_res_stream; 202 struct folio *rq_scratch_folio; 203 struct xdr_buf rq_res; 204 unsigned long rq_maxpages; /* num of entries in rq_pages */ 205 struct page * *rq_pages; 206 struct page * *rq_respages; /* points into rq_pages */ 207 struct page * *rq_next_page; /* next reply page to use */ 208 struct page * *rq_page_end; /* one past the last page */ 209 210 struct folio_batch rq_fbatch; 211 struct bio_vec *rq_bvec; 212 213 __be32 rq_xid; /* transmission id */ 214 u32 rq_prog; /* program number */ 215 u32 rq_vers; /* program version */ 216 u32 rq_proc; /* procedure number */ 217 u32 rq_prot; /* IP protocol */ 218 int rq_cachetype; /* catering to nfsd */ 219 unsigned long rq_flags; /* flags field */ 220 ktime_t rq_qtime; /* enqueue time */ 221 222 void * rq_argp; /* decoded arguments */ 223 void * rq_resp; /* xdr'd results */ 224 __be32 *rq_accept_statp; 225 void * rq_auth_data; /* flavor-specific data */ 226 __be32 rq_auth_stat; /* authentication status */ 227 int rq_auth_slack; /* extra space xdr code 228 * should leave in head 229 * for krb5i, krb5p. 230 */ 231 int rq_reserved; /* space on socket outq 232 * reserved for this request 233 */ 234 ktime_t rq_stime; /* start time */ 235 236 struct cache_req rq_chandle; /* handle passed to caches for 237 * request delaying 238 */ 239 /* Catering to nfsd */ 240 struct auth_domain * rq_client; /* RPC peer info */ 241 struct auth_domain * rq_gssclient; /* "gss/"-style peer info */ 242 struct task_struct *rq_task; /* service thread */ 243 struct net *rq_bc_net; /* pointer to backchannel's 244 * net namespace 245 */ 246 247 int rq_err; /* Thread sets this to inidicate 248 * initialisation success. 249 */ 250 251 unsigned long bc_to_initval; 252 unsigned int bc_to_retries; 253 unsigned int rq_status_counter; /* RPC processing counter */ 254 void **rq_lease_breaker; /* The v4 client breaking a lease */ 255 }; 256 257 /* bits for rq_flags */ 258 enum { 259 RQ_SECURE, /* secure port */ 260 RQ_LOCAL, /* local request */ 261 RQ_USEDEFERRAL, /* use deferral */ 262 RQ_DROPME, /* drop current reply */ 263 RQ_VICTIM, /* Have agreed to shut down */ 264 RQ_DATA, /* request has data */ 265 }; 266 267 #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) 268 269 /* 270 * Rigorous type checking on sockaddr type conversions 271 */ 272 static inline struct sockaddr_in *svc_addr_in(const struct svc_rqst *rqst) 273 { 274 return (struct sockaddr_in *) &rqst->rq_addr; 275 } 276 277 static inline struct sockaddr_in6 *svc_addr_in6(const struct svc_rqst *rqst) 278 { 279 return (struct sockaddr_in6 *) &rqst->rq_addr; 280 } 281 282 static inline struct sockaddr *svc_addr(const struct svc_rqst *rqst) 283 { 284 return (struct sockaddr *) &rqst->rq_addr; 285 } 286 287 static inline struct sockaddr_in *svc_daddr_in(const struct svc_rqst *rqst) 288 { 289 return (struct sockaddr_in *) &rqst->rq_daddr; 290 } 291 292 static inline struct sockaddr_in6 *svc_daddr_in6(const struct svc_rqst *rqst) 293 { 294 return (struct sockaddr_in6 *) &rqst->rq_daddr; 295 } 296 297 static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst) 298 { 299 return (struct sockaddr *) &rqst->rq_daddr; 300 } 301 302 /** 303 * svc_thread_should_stop - check if this thread should stop 304 * @rqstp: the thread that might need to stop 305 * 306 * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS 307 * are set. The first thread which sees SP_NEED_VICTIM clears it, becoming 308 * the victim using this function. It should then promptly call 309 * svc_exit_thread() to complete the process, clearing SP_VICTIM_REMAINS 310 * so the task waiting for a thread to exit can wake and continue. 311 * 312 * Return values: 313 * %true: caller should invoke svc_exit_thread() 314 * %false: caller should do nothing 315 */ 316 static inline bool svc_thread_should_stop(struct svc_rqst *rqstp) 317 { 318 if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags)) 319 set_bit(RQ_VICTIM, &rqstp->rq_flags); 320 321 return test_bit(RQ_VICTIM, &rqstp->rq_flags); 322 } 323 324 /** 325 * svc_thread_init_status - report whether thread has initialised successfully 326 * @rqstp: the thread in question 327 * @err: errno code 328 * 329 * After performing any initialisation that could fail, and before starting 330 * normal work, each sunrpc svc_thread must call svc_thread_init_status() 331 * with an appropriate error, or zero. 332 * 333 * If zero is passed, the thread is ready and must continue until 334 * svc_thread_should_stop() returns true. If a non-zero error is passed 335 * the call will not return - the thread will exit. 336 */ 337 static inline void svc_thread_init_status(struct svc_rqst *rqstp, int err) 338 { 339 store_release_wake_up(&rqstp->rq_err, err); 340 if (err) 341 kthread_exit(1); 342 } 343 344 struct svc_deferred_req { 345 u32 prot; /* protocol (UDP or TCP) */ 346 struct svc_xprt *xprt; 347 struct sockaddr_storage addr; /* where reply must go */ 348 size_t addrlen; 349 struct sockaddr_storage daddr; /* where reply must come from */ 350 size_t daddrlen; 351 void *xprt_ctxt; 352 struct cache_deferred_req handle; 353 int argslen; 354 __be32 args[]; 355 }; 356 357 struct svc_process_info { 358 union { 359 int (*dispatch)(struct svc_rqst *rqstp); 360 struct { 361 unsigned int lovers; 362 unsigned int hivers; 363 } mismatch; 364 }; 365 }; 366 367 /* 368 * RPC program - an array of these can use the same transport endpoint 369 */ 370 struct svc_program { 371 u32 pg_prog; /* program number */ 372 unsigned int pg_lovers; /* lowest version */ 373 unsigned int pg_hivers; /* highest version */ 374 unsigned int pg_nvers; /* number of versions */ 375 const struct svc_version **pg_vers; /* version array */ 376 char * pg_name; /* service name */ 377 char * pg_class; /* class name: services sharing authentication */ 378 enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp); 379 __be32 (*pg_init_request)(struct svc_rqst *, 380 const struct svc_program *, 381 struct svc_process_info *); 382 int (*pg_rpcbind_set)(struct net *net, 383 const struct svc_program *, 384 u32 version, int family, 385 unsigned short proto, 386 unsigned short port); 387 }; 388 389 /* 390 * RPC program version 391 */ 392 struct svc_version { 393 u32 vs_vers; /* version number */ 394 u32 vs_nproc; /* number of procedures */ 395 const struct svc_procedure *vs_proc; /* per-procedure info */ 396 unsigned long __percpu *vs_count; /* call counts */ 397 u32 vs_xdrsize; /* xdrsize needed for this version */ 398 399 /* Don't register with rpcbind */ 400 bool vs_hidden; 401 402 /* Don't care if the rpcbind registration fails */ 403 bool vs_rpcb_optnl; 404 405 /* Need xprt with congestion control */ 406 bool vs_need_cong_ctrl; 407 408 /* Dispatch function */ 409 int (*vs_dispatch)(struct svc_rqst *rqstp); 410 }; 411 412 /* 413 * RPC procedure info 414 */ 415 struct svc_procedure { 416 /* process the request: */ 417 __be32 (*pc_func)(struct svc_rqst *); 418 /* XDR decode args: */ 419 bool (*pc_decode)(struct svc_rqst *rqstp, 420 struct xdr_stream *xdr); 421 /* XDR encode result: */ 422 bool (*pc_encode)(struct svc_rqst *rqstp, 423 struct xdr_stream *xdr); 424 /* XDR free result: */ 425 void (*pc_release)(struct svc_rqst *); 426 unsigned int pc_argsize; /* argument struct size */ 427 unsigned int pc_argzero; /* how much of argument to clear */ 428 unsigned int pc_ressize; /* result struct size */ 429 unsigned int pc_cachetype; /* cache info (NFS) */ 430 unsigned int pc_xdrressize; /* maximum size of XDR reply */ 431 const char * pc_name; /* for display */ 432 }; 433 434 /* 435 * Function prototypes. 436 */ 437 int sunrpc_set_pool_mode(const char *val); 438 int sunrpc_get_pool_mode(char *val, size_t size); 439 void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); 440 int svc_bind(struct svc_serv *serv, struct net *net); 441 struct svc_serv *svc_create(struct svc_program *, unsigned int, 442 int (*threadfn)(void *data)); 443 bool svc_rqst_replace_page(struct svc_rqst *rqstp, 444 struct page *page); 445 void svc_rqst_release_pages(struct svc_rqst *rqstp); 446 int svc_new_thread(struct svc_serv *serv, struct svc_pool *pool); 447 void svc_exit_thread(struct svc_rqst *); 448 struct svc_serv * svc_create_pooled(struct svc_program *prog, 449 unsigned int nprog, 450 struct svc_stat *stats, 451 unsigned int bufsize, 452 int (*threadfn)(void *data)); 453 int svc_set_pool_threads(struct svc_serv *serv, struct svc_pool *pool, 454 unsigned int min_threads, unsigned int max_threads); 455 int svc_set_num_threads(struct svc_serv *serv, unsigned int min_threads, 456 unsigned int nrservs); 457 int svc_pool_stats_open(struct svc_info *si, struct file *file); 458 void svc_process(struct svc_rqst *rqstp); 459 void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp); 460 int svc_register(const struct svc_serv *, struct net *, const int, 461 const unsigned short, const unsigned short); 462 463 void svc_wake_up(struct svc_serv *); 464 void svc_reserve(struct svc_rqst *rqstp, int space); 465 void svc_pool_wake_idle_thread(struct svc_pool *pool); 466 struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv); 467 char * svc_print_addr(struct svc_rqst *, char *, size_t); 468 const char * svc_proc_name(const struct svc_rqst *rqstp); 469 int svc_encode_result_payload(struct svc_rqst *rqstp, 470 unsigned int offset, 471 unsigned int length); 472 char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, 473 struct kvec *first, void *p, 474 size_t total); 475 __be32 svc_generic_init_request(struct svc_rqst *rqstp, 476 const struct svc_program *progp, 477 struct svc_process_info *procinfo); 478 int svc_generic_rpcbind_set(struct net *net, 479 const struct svc_program *progp, 480 u32 version, int family, 481 unsigned short proto, 482 unsigned short port); 483 484 #define RPC_MAX_ADDRBUFLEN (63U) 485 486 /* 487 * When we want to reduce the size of the reserved space in the response 488 * buffer, we need to take into account the size of any checksum data that 489 * may be at the end of the packet. This is difficult to determine exactly 490 * for all cases without actually generating the checksum, so we just use a 491 * static value. 492 */ 493 static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) 494 { 495 svc_reserve(rqstp, space + rqstp->rq_auth_slack); 496 } 497 498 /** 499 * svcxdr_init_decode - Prepare an xdr_stream for Call decoding 500 * @rqstp: controlling server RPC transaction context 501 * 502 */ 503 static inline void svcxdr_init_decode(struct svc_rqst *rqstp) 504 { 505 struct xdr_stream *xdr = &rqstp->rq_arg_stream; 506 struct xdr_buf *buf = &rqstp->rq_arg; 507 struct kvec *argv = buf->head; 508 509 WARN_ON(buf->len != buf->head->iov_len + buf->page_len + buf->tail->iov_len); 510 buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; 511 512 xdr_init_decode(xdr, buf, argv->iov_base, NULL); 513 xdr_set_scratch_folio(xdr, rqstp->rq_scratch_folio); 514 } 515 516 /** 517 * svcxdr_init_encode - Prepare an xdr_stream for svc Reply encoding 518 * @rqstp: controlling server RPC transaction context 519 * 520 */ 521 static inline void svcxdr_init_encode(struct svc_rqst *rqstp) 522 { 523 struct xdr_stream *xdr = &rqstp->rq_res_stream; 524 struct xdr_buf *buf = &rqstp->rq_res; 525 struct kvec *resv = buf->head; 526 527 xdr_reset_scratch_buffer(xdr); 528 529 xdr->buf = buf; 530 xdr->iov = resv; 531 xdr->p = resv->iov_base + resv->iov_len; 532 xdr->end = resv->iov_base + PAGE_SIZE; 533 buf->len = resv->iov_len; 534 xdr->page_ptr = buf->pages - 1; 535 buf->buflen = PAGE_SIZE * (rqstp->rq_page_end - buf->pages); 536 xdr->rqst = NULL; 537 } 538 539 /** 540 * svcxdr_encode_opaque_pages - Insert pages into an xdr_stream 541 * @xdr: xdr_stream to be updated 542 * @pages: array of pages to insert 543 * @base: starting offset of first data byte in @pages 544 * @len: number of data bytes in @pages to insert 545 * 546 * After the @pages are added, the tail iovec is instantiated pointing 547 * to end of the head buffer, and the stream is set up to encode 548 * subsequent items into the tail. 549 */ 550 static inline void svcxdr_encode_opaque_pages(struct svc_rqst *rqstp, 551 struct xdr_stream *xdr, 552 struct page **pages, 553 unsigned int base, 554 unsigned int len) 555 { 556 xdr_write_pages(xdr, pages, base, len); 557 xdr->page_ptr = rqstp->rq_next_page - 1; 558 } 559 560 /** 561 * svcxdr_set_auth_slack - 562 * @rqstp: RPC transaction 563 * @slack: buffer space to reserve for the transaction's security flavor 564 * 565 * Set the request's slack space requirement, and set aside that much 566 * space in the rqstp's rq_res.head for use when the auth wraps the Reply. 567 */ 568 static inline void svcxdr_set_auth_slack(struct svc_rqst *rqstp, int slack) 569 { 570 struct xdr_stream *xdr = &rqstp->rq_res_stream; 571 struct xdr_buf *buf = &rqstp->rq_res; 572 struct kvec *resv = buf->head; 573 574 rqstp->rq_auth_slack = slack; 575 576 xdr->end -= XDR_QUADLEN(slack); 577 buf->buflen -= rqstp->rq_auth_slack; 578 579 WARN_ON(xdr->iov != resv); 580 WARN_ON(xdr->p > xdr->end); 581 } 582 583 /** 584 * svcxdr_set_accept_stat - Reserve space for the accept_stat field 585 * @rqstp: RPC transaction context 586 * 587 * Return values: 588 * %true: Success 589 * %false: No response buffer space was available 590 */ 591 static inline bool svcxdr_set_accept_stat(struct svc_rqst *rqstp) 592 { 593 struct xdr_stream *xdr = &rqstp->rq_res_stream; 594 595 rqstp->rq_accept_statp = xdr_reserve_space(xdr, XDR_UNIT); 596 if (unlikely(!rqstp->rq_accept_statp)) 597 return false; 598 *rqstp->rq_accept_statp = rpc_success; 599 return true; 600 } 601 602 #endif /* SUNRPC_SVC_H */ 603