xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4x_state.c (revision 08855964b9970604433f7b19dcd71cf5af5e5f14)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
26  * Copyright 2017 RackTop Systems.
27  */
28 
29 #include <sys/sdt.h>
30 #include <sys/atomic.h>
31 #include <nfs/nfs4.h>
32 
33 #ifdef DEBUG
34 #define	RFS4_TABSIZE 17
35 #else
36 #define	RFS4_TABSIZE 2047
37 #endif
38 
39 #define	RFS4_MAXTABSZ 1024*1024
40 
41 slotid4 rfs4_max_slots		= MAXSLOTS;		/* fore channel */
42 slotid4 rfs4_back_max_slots	= MAXSLOTS_BACK;	/* back channel */
43 
44 typedef union {
45 	/* Both members have the same size */
46 	struct {
47 		uint32_t pad0;
48 		uint32_t pad1;
49 		uint32_t start_time;	/* NFS server start time */
50 		uint32_t s_id;		/* unique session index */
51 	} impl_id;
52 	sessionid4 id4;
53 } rfs4_sid;
54 
55 /*
56  * --------------------------------------------------------
57  * MDS - NFSv4.1  Sessions
58  * --------------------------------------------------------
59  */
60 static uint32_t
61 sessid_hash(void *key)
62 {
63 	rfs4_sid *idp = key;
64 
65 	return (idp->impl_id.s_id);
66 }
67 
68 static bool_t
69 sessid_compare(rfs4_entry_t entry, void *key)
70 {
71 	rfs4_session_t	*sp = (rfs4_session_t *)entry;
72 	sessionid4	*idp = (sessionid4 *)key;
73 
74 	return (bcmp(idp, &sp->sn_sessid, sizeof (sessionid4)) == 0);
75 }
76 
77 static void *
78 sessid_mkkey(rfs4_entry_t entry)
79 {
80 	rfs4_session_t *sp = (rfs4_session_t *)entry;
81 
82 	return (&sp->sn_sessid);
83 }
84 
85 void
86 rfs4x_session_rele(rfs4_session_t *sp)
87 {
88 	rfs4_dbe_rele(sp->sn_dbe);
89 }
90 
91 void
92 rfs4x_session_hold(rfs4_session_t *sp)
93 {
94 	rfs4_dbe_hold(sp->sn_dbe);
95 }
96 
97 rfs4_session_t *
98 rfs4x_findsession_by_id(sessionid4 sessid)
99 {
100 	rfs4_session_t	*sp;
101 	bool_t		 create = FALSE;
102 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
103 
104 	sp = (rfs4_session_t *)rfs4_dbsearch(nsrv4->rfs4_session_idx,
105 	    sessid, &create, NULL, RFS4_DBS_VALID);
106 
107 	return (sp);
108 }
109 
110 /*
111  * A clientid can have multiple sessions associated with it. Hence,
112  * performing a raw 'mds_findsession' (even for a create) might
113  * yield a list of sessions associated with the clientid in question.
114  * Call rfs4_dbseach() function with key that cannot be found
115  * and create an association between the session table and both
116  * primary (sessionid) index and secondary (clientid) index for the
117  * newly created session.
118  */
119 
120 rfs4_session_t	*
121 rfs4x_createsession(session41_create_t *ap)
122 {
123 	static uint32_t session_id_counter;
124 
125 	rfs4_session_t	*sp = NULL;
126 	bool_t create = TRUE;
127 	rfs4_sid key = {0, 0, 0, 0};
128 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
129 
130 	/*
131 	 * Use unique counter for s_id and s_id to ensure that
132 	 * created entry will have the same index in dbi_buckets[]
133 	 */
134 	ap->cs_id = key.impl_id.s_id = atomic_inc_32_nv(&session_id_counter);
135 
136 	if ((sp = (rfs4_session_t *)rfs4_dbsearch(nsrv4->rfs4_session_idx,
137 	    &key, &create, (void *)ap, RFS4_DBS_VALID)) == NULL) {
138 		DTRACE_PROBE1(mds__srv__createsession__fail,
139 		    session41_create_t *, ap);
140 	}
141 	return (sp);
142 }
143 
144 /* return success of operation */
145 static bool_t
146 client_insert_session(rfs4_client_t *cp, rfs4_session_t *sp)
147 {
148 	bool_t res = TRUE;
149 
150 	rfs4_dbe_lock(cp->rc_dbe);
151 	if (cp->rc_destroying)
152 		res = FALSE;
153 	else
154 		list_insert_tail(&cp->rc_sessions, sp);
155 	rfs4_dbe_unlock(cp->rc_dbe);
156 
157 	return (res);
158 }
159 
160 static void
161 client_remove_session(rfs4_client_t *cp, rfs4_session_t *sp)
162 {
163 	rfs4_dbe_lock(cp->rc_dbe);
164 	if (list_link_active(&sp->sn_node))
165 		list_remove(&cp->rc_sessions, sp);
166 	rfs4_dbe_unlock(cp->rc_dbe);
167 }
168 
169 /*
170  * Invalidate the session in the DB (so it can't be found anymore)
171  */
172 nfsstat4
173 rfs4x_destroysession(rfs4_session_t *sp, unsigned useref)
174 {
175 	nfsstat4 status = NFS4_OK;
176 
177 	/*
178 	 * RFC 7862 Section 14.1.3:
179 	 * In hindsight, the  NFSv4.1 specification should have
180 	 * mandated that DESTROY_SESSION either abort or complete
181 	 * all outstanding operations.
182 	 */
183 	rfs4_dbe_lock(sp->sn_dbe);
184 	if (rfs4_dbe_refcnt(sp->sn_dbe) > useref)
185 		status = NFS4ERR_DELAY;
186 	else
187 		rfs4_dbe_invalidate(sp->sn_dbe);
188 	rfs4_dbe_unlock(sp->sn_dbe);
189 
190 	if (status == NFS4_OK)
191 		client_remove_session(sp->sn_clnt, sp);
192 
193 	return (status);
194 }
195 
196 /* Invalidate all client's sessions */
197 void
198 rfs4x_client_session_remove(rfs4_client_t *cp)
199 {
200 	rfs4_session_t *sp;
201 
202 	/*
203 	 * Client is forcibly closing so invalidate all sessions
204 	 * without checking the refcount.
205 	 */
206 	rfs4_dbe_lock(cp->rc_dbe);
207 	while ((sp = list_head(&cp->rc_sessions)) != NULL) {
208 		list_remove(&cp->rc_sessions, sp);
209 
210 		rfs4_dbe_invalidate(sp->sn_dbe);
211 	}
212 	rfs4_dbe_unlock(cp->rc_dbe);
213 }
214 
215 nfsstat4
216 sess_chan_limits(sess_channel_t *scp)
217 {
218 	if (scp->cn_attrs.ca_maxrequests > rfs4_max_slots) {
219 		scp->cn_attrs.ca_maxrequests = rfs4_max_slots;
220 	}
221 
222 	if (scp->cn_back_attrs.ca_maxrequests > rfs4_back_max_slots)
223 		scp->cn_back_attrs.ca_maxrequests = rfs4_back_max_slots;
224 
225 
226 	if (scp->cn_attrs.ca_maxoperations > NFS4_COMPOUND_LIMIT)
227 		scp->cn_attrs.ca_maxoperations = NFS4_COMPOUND_LIMIT;
228 
229 	/*
230 	 * Lower limit should be set to smallest sane COMPOUND. Even
231 	 * though a singleton SEQUENCE op is the very smallest COMPOUND,
232 	 * it's also quite boring. For all practical purposes, the lower
233 	 * limit for creating a sess is limited to:
234 	 *
235 	 *		[SEQUENCE + PUTROOTFH + GETFH]
236 	 *
237 	 *	 Can't limit READ's to a specific threshold, otherwise
238 	 *	 we artificially limit the clients to perform reads of
239 	 *	 AT LEAST that granularity, which is WRONG !!! Same goes
240 	 *	 for READDIR's and GETATTR's.
241 	 */
242 	if (scp->cn_attrs.ca_maxresponsesize < (sizeof (SEQUENCE4res) +
243 	    sizeof (PUTROOTFH4res) + sizeof (GETFH4res)))
244 		return (NFS4ERR_TOOSMALL);
245 	return (NFS4_OK);
246 }
247 
248 /*
249  * NFSv4.1 Slot replay cache
250  */
251 static void
252 rfs41_cleanup_slot(rfs4_slot_t *se)
253 {
254 	rfs4_compound_free((COMPOUND4res *)&se->se_buf);
255 }
256 
257 static rfs4_slot_t *
258 slots_alloc(size_t n)
259 {
260 	rfs4_slot_t *p;
261 	int i;
262 
263 	p = kmem_zalloc(sizeof (rfs4_slot_t) * n, KM_SLEEP);
264 	for (i = 0; i < n; i++) {
265 		mutex_init(&p[i].se_lock, NULL, MUTEX_DEFAULT, NULL);
266 	}
267 
268 	return (p);
269 }
270 
271 static void
272 slots_free(rfs4_slot_t *slots, size_t n)
273 {
274 	int i;
275 
276 	for (i = 0; i < n; i++) {
277 		rfs4_slot_t *slot = &slots[i];
278 
279 		mutex_destroy(&slot->se_lock);
280 
281 		if (slot->se_flags & RFS4_SLOT_CACHED) {
282 			rfs41_cleanup_slot(slot);
283 		}
284 	}
285 	kmem_free(slots, sizeof (rfs4_slot_t) * n);
286 }
287 
288 /* Additional functions */
289 
290 /* check csa_flags for OP_CREATE_SESSION */
291 bool_t
292 nfs4x_csa_flags_valid(uint32_t flags)
293 {
294 	if (flags & ~CREATE_SESSION4_FLAG_MASK)
295 		return (FALSE);
296 
297 	return (TRUE);
298 }
299 
300 sess_channel_t *
301 rfs41_create_session_channel(channel_dir_from_server4 dir)
302 {
303 	sess_channel_t   *cp;
304 	sess_bcsd_t	 *bp;
305 
306 	cp = (sess_channel_t *)kmem_zalloc(sizeof (sess_channel_t), KM_SLEEP);
307 	rw_init(&cp->cn_lock, NULL, RW_DEFAULT, NULL);
308 
309 	switch (dir) {
310 	case CDFS4_FORE:
311 		break;
312 
313 	case CDFS4_BOTH:
314 	case CDFS4_BACK:
315 		/* BackChan Specific Data */
316 		bp = (sess_bcsd_t *)kmem_zalloc(sizeof (sess_bcsd_t), KM_SLEEP);
317 		rw_init(&bp->bsd_rwlock, NULL, RW_DEFAULT, NULL);
318 		cp->cn_csd = (sess_bcsd_t *)bp;
319 		break;
320 	}
321 	return (cp);
322 }
323 
324 void
325 rfs41_destroy_session_channel(rfs4_session_t *sp)
326 {
327 	sess_channel_t	*cp;
328 	sess_bcsd_t	*bp;
329 
330 	if (sp->sn_back != NULL) {
331 		/* only one channel for both direction for now */
332 		ASSERT(sp->sn_fore == sp->sn_back);
333 
334 		cp = sp->sn_back;
335 		bp = (sess_bcsd_t *)cp->cn_csd;
336 		rw_destroy(&bp->bsd_rwlock);
337 		kmem_free(bp, sizeof (sess_bcsd_t));
338 	} else {
339 		cp = sp->sn_fore;
340 	}
341 
342 	rw_destroy(&cp->cn_lock);
343 	kmem_free(cp, sizeof (sess_channel_t));
344 
345 	sp->sn_back = NULL;
346 	sp->sn_fore = NULL;
347 }
348 
349 static bool_t
350 rfs4_session_create(rfs4_entry_t u_entry, void *arg)
351 {
352 	rfs4_session_t		*sp = (rfs4_session_t *)u_entry;
353 	session41_create_t	*ap = (session41_create_t *)arg;
354 	sess_channel_t		*ocp = NULL;
355 	rfs4_sid		*sidp;
356 	bool_t			 bdrpc = FALSE;
357 	channel_dir_from_server4 dir;
358 	nfsstat4		 sle;
359 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
360 
361 	ASSERT(sp != NULL);
362 	if (sp == NULL)
363 		return (FALSE);
364 
365 	/*
366 	 * Back pointer/ref to parent data struct (rfs4_client_t)
367 	 */
368 	sp->sn_clnt = (rfs4_client_t *)ap->cs_client;
369 	rfs4_dbe_hold(sp->sn_clnt->rc_dbe);
370 
371 	/*
372 	 * Handcrafting the session id
373 	 */
374 	sidp = (rfs4_sid *)&sp->sn_sessid;
375 	sidp->impl_id.pad0 = 0x00000000;
376 	sidp->impl_id.pad1 = 0xFFFFFFFF;
377 	sidp->impl_id.start_time = nsrv4->rfs4_start_time;
378 	sidp->impl_id.s_id = ap->cs_id;
379 
380 	/*
381 	 * Process csa_flags; note that CREATE_SESSION4_FLAG_CONN_BACK_CHAN
382 	 * is processed below since it affects direction and setup of the
383 	 * backchannel accordingly.
384 	 */
385 	if (!nfs4x_csa_flags_valid(ap->cs_aotw.csa_flags)) {
386 		ap->cs_error = NFS4ERR_INVAL;
387 		goto err;
388 	}
389 
390 	sp->sn_csflags = ap->cs_aotw.csa_flags;
391 	if (ap->cs_aotw.csa_flags & CREATE_SESSION4_FLAG_PERSIST)
392 		/* Do not support persistent reply cache (yet). */
393 		sp->sn_csflags &= ~CREATE_SESSION4_FLAG_PERSIST;
394 
395 	if (ap->cs_aotw.csa_flags & CREATE_SESSION4_FLAG_CONN_RDMA)
396 		/* No RDMA for now */
397 		sp->sn_csflags &= ~CREATE_SESSION4_FLAG_CONN_RDMA;
398 
399 	/*
400 	 * Initialize some overall sessions values
401 	 */
402 	sp->sn_bc.progno = ap->cs_aotw.csa_cb_program;
403 	sp->sn_laccess = nfs_sys_uptime();
404 	sp->sn_flags = 0;
405 	sp->sn_rcached = 0;
406 
407 	/*
408 	 * Check if client has specified that the FORE channel should
409 	 * also be used for call back traffic (ie. bidir RPC). If so,
410 	 * let's try to accomodate the request.
411 	 */
412 	DTRACE_PROBE1(csa__flags, uint32_t, ap->cs_aotw.csa_flags);
413 
414 	/*
415 	 * Session's channel flags depending on bdrpc
416 	 * TODO: Add backchannel handling, i.e. when bdrpc is TRUE
417 	 */
418 	dir = bdrpc ? (CDFS4_FORE | CDFS4_BACK) : CDFS4_FORE;
419 	ocp = rfs41_create_session_channel(dir);
420 	ocp->cn_dir = dir;
421 	sp->sn_fore = ocp;
422 
423 	/*
424 	 * Check if channel attrs will be flexible enough for future
425 	 * purposes. Channel attribute enforcement is done as part of
426 	 * COMPOUND processing.
427 	 */
428 	ocp->cn_attrs = ap->cs_aotw.csa_fore_chan_attrs;
429 	ocp->cn_back_attrs = ap->cs_aotw.csa_back_chan_attrs;
430 	sle = sess_chan_limits(ocp);
431 	if (sle != NFS4_OK) {
432 		ap->cs_error = sle;
433 		goto err_free_chan;
434 	}
435 
436 	/* will fail if client is going to destroy */
437 	if (!client_insert_session(sp->sn_clnt, sp)) {
438 		ap->cs_error = NFS4ERR_DELAY;
439 		goto err_free_chan;
440 	}
441 
442 	/*
443 	 * No need for locks/synchronization at this time,
444 	 * since we're barely creating the session.
445 	 */
446 	if (bdrpc) {
447 		/* Need to be implemented */
448 		VERIFY(0);
449 	} else {
450 		sp->sn_csflags &= ~CREATE_SESSION4_FLAG_CONN_BACK_CHAN;
451 		sp->sn_back = NULL;
452 	}
453 
454 	/*
455 	 * Now we allocate space for the slrc, initializing each slot's
456 	 * sequenceid and slotid to zero and a (pre)cached result of
457 	 * NFS4ERR_SEQ_MISORDERED. Note that we zero out the entries
458 	 * by virtue of the z-alloc.
459 	 */
460 	sp->sn_slots = slots_alloc(ocp->cn_attrs.ca_maxrequests);
461 
462 	return (TRUE);
463 
464 err_free_chan:
465 	rfs41_destroy_session_channel(sp);
466 err:
467 	rfs4_dbe_rele(sp->sn_clnt->rc_dbe);
468 	return (FALSE);
469 }
470 
471 static void
472 rfs4_session_destroy(rfs4_entry_t u_entry)
473 {
474 	rfs4_session_t	*sp = (rfs4_session_t *)u_entry;
475 	sess_bcsd_t	*bsdp;
476 
477 	if (SN_CB_CHAN_EST(sp) && (bsdp = sp->sn_back->cn_csd) != NULL) {
478 		slots_free(bsdp->bsd_slots,
479 		    sp->sn_back->cn_back_attrs.ca_maxrequests);
480 		bsdp->bsd_slots = NULL;
481 	}
482 
483 	/*
484 	 * Nuke slot replay cache for this session
485 	 */
486 	if (sp->sn_slots) {
487 		slots_free(sp->sn_slots, sp->sn_fore->cn_attrs.ca_maxrequests);
488 		sp->sn_slots = NULL;
489 	}
490 
491 	/*
492 	 * Remove the fore and back channels.
493 	 */
494 	rfs41_destroy_session_channel(sp);
495 
496 	client_remove_session(sp->sn_clnt, sp);
497 
498 	rfs4_client_rele(sp->sn_clnt);
499 }
500 
501 static bool_t
502 rfs4_session_expiry(rfs4_entry_t u_entry)
503 {
504 	rfs4_session_t *sp = (rfs4_session_t *)u_entry;
505 
506 	if (sp == NULL || rfs4_dbe_is_invalid(sp->sn_dbe))
507 		return (TRUE);
508 
509 	if (rfs4_lease_expired(sp->sn_clnt))
510 		return (TRUE);
511 
512 	return (FALSE);
513 }
514 
515 void
516 rfs4x_state_init_locked(nfs4_srv_t *nsrv4)
517 {
518 	nsrv4->rfs4_session_tab = rfs4_table_create(nsrv4->nfs4_server_state,
519 	    "Session", 5 * rfs4_lease_time, 1, rfs4_session_create,
520 	    rfs4_session_destroy, rfs4_session_expiry, sizeof (rfs4_session_t),
521 	    RFS4_TABSIZE, RFS4_MAXTABSZ/8, -1);
522 
523 	nsrv4->rfs4_session_idx = rfs4_index_create(nsrv4->rfs4_session_tab,
524 	    "session_idx", sessid_hash, sessid_compare, sessid_mkkey, TRUE);
525 }
526 
527 void
528 rfs4x_state_fini(nfs4_srv_t *nsrv4)
529 {
530 	/* All tables will be destroyed by caller */
531 }
532