xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb2_fsctl_odx.c (revision c585f97b10d318e825698eb51d4671fef5b4d21f)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2018-2021 Tintri by DDN, Inc.  All rights reserved.
14  */
15 
16 /*
17  * Support functions for smb2_ioctl/fsctl codes:
18  * FSCTL_SRV_OFFLOAD_READ
19  * FSCTL_SRV_OFFLOAD_WRITE
20  * (and related)
21  */
22 
23 #include <smbsrv/smb2_kproto.h>
24 #include <smbsrv/smb_fsops.h>
25 #include <smb/winioctl.h>
26 
27 /*
28  * Summary of how offload data transfer works:
29  *
30  * The client drives a server-side copy.  Outline:
31  * 1: open src_file
32  * 2: create dst_file and set its size
33  * 3: while src_file not all copied {
34  *        offload_read(src_file, &token);
35  *        while token not all copied {
36  *	      offload_write(dst_file, token);
37  *        }
38  *    }
39  *
40  * Each "offload read" request returns a "token" representing some
41  * portion of the source file.  The server decides what kind of
42  * token to use, and how much of the source file it should cover.
43  * The length represented may be less then the client requested.
44  * No data are copied during offload_read (just meta-data).
45  *
46  * Each "offload write" request copies some portion of the data
47  * represented by the "token" into the output file.  The amount
48  * of data copied may be less than the client requested, and the
49  * client keeps sending offload write requests until they have
50  * copied all the data represented by the current token.
51  */
52 
53 /* [MS-FSA] OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND_CURRENT_RANGE */
54 #define	OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND	1
55 
56 /*
57  * [MS-FSCC] 2.3.79 STORAGE_OFFLOAD_TOKEN
58  * Note reserved: 0xFFFF0002 – 0xFFFFFFFF
59  *
60  * ...TOKEN_TYPE_ZERO_DATA:  A well-known Token that indicates ...
61  * (offload write should just zero to the destination)
62  * The payload (tok_other) is ignored with this type.
63  */
64 #define	STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA	0xFFFF0001
65 
66 /* Our vendor-specific token type: struct tok_native1 */
67 #define	STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1	0x10001
68 
69 #define	TOKEN_TOTAL_SIZE	512
70 #define	TOKEN_MAX_PAYLOAD	504	/* 512 - 8 */
71 
72 /* This mask is for sanity checking offsets etc. */
73 #define	OFFMASK		((uint64_t)DEV_BSIZE-1)
74 
75 typedef struct smb_odx_token {
76 	uint32_t	tok_type;	/* big-endian on the wire */
77 	uint16_t	tok_reserved;	/* zero */
78 	uint16_t	tok_len;	/* big-endian on the wire */
79 	union {
80 		uint8_t u_tok_other[TOKEN_MAX_PAYLOAD];
81 		struct tok_native1 {
82 			smb2fid_t	tn1_fid;
83 			uint64_t	tn1_off;
84 			uint64_t	tn1_eof;
85 			uint32_t	tn1_tid;
86 		} u_tok_native1;
87 	} tok_u;
88 } smb_odx_token_t;
89 
90 typedef struct odx_write_args {
91 	uint32_t in_struct_size;
92 	uint32_t in_flags;
93 	uint64_t in_dstoff;
94 	uint64_t in_xlen;
95 	uint64_t in_xoff;
96 	uint32_t out_struct_size;
97 	uint32_t out_flags;
98 	uint64_t out_xlen;
99 	uint64_t wa_eof;
100 } odx_write_args_t;
101 
102 static int smb_odx_get_token(mbuf_chain_t *, smb_odx_token_t *);
103 static int smb_odx_get_token_native1(mbuf_chain_t *, struct tok_native1 *);
104 static int smb_odx_put_token(mbuf_chain_t *, smb_odx_token_t *);
105 static int smb_odx_put_token_native1(mbuf_chain_t *, struct tok_native1 *);
106 
107 static uint32_t smb2_fsctl_odx_write_zeros(smb_request_t *, odx_write_args_t *);
108 static uint32_t smb2_fsctl_odx_write_native1(smb_request_t *,
109     odx_write_args_t *, smb_odx_token_t *);
110 
111 
112 /* We can disable this feature for testing etc. */
113 int smb2_odx_enable = 1;
114 
115 /*
116  * These two variables determine the intervals of offload_read and
117  * offload_write calls (respectively) during an offload copy.
118  *
119  * For the offload read token we could offer a token representing
120  * the whole file, but we'll have the client come back for a new
121  * "token" after each 256M so we have a chance to look for "holes".
122  * This lets us use the special "zero" token while we're in any
123  * un-allocated parts of the file, so offload_write can use the
124  * (more efficient) smb_fsop_freesp instead of copying.
125  *
126  * We limit the size of offload_write to 16M per request so we
127  * don't end up taking so long with I/O that the client might
128  * time out the request.  Keep: write_max <= read_max
129  */
130 uint32_t smb2_odx_read_max = (1<<28); /* 256M */
131 uint32_t smb2_odx_write_max = (1<<24); /* 16M */
132 
133 /*
134  * This buffer size determines the I/O size for the copy during
135  * offoad write, where it will read/write using this buffer.
136  * Note: We kmem_alloc this, so don't make it HUGE.  It only
137  * needs to be large enough to allow the copy to proceed with
138  * reasonable efficiency.  1M is currently the largest possible
139  * block size with ZFS, so that's what we'll use here.
140  *
141  * Actually, limit this to kmem_max_cached, to avoid contention
142  * allocating from kmem_oversize_arena.
143  */
144 uint32_t smb2_odx_buf_size = (1<<17); /* 128k */
145 
146 
147 /*
148  * FSCTL_OFFLOAD_READ
149  * [MS-FSCC] 2.3.77
150  *
151  * Similar (in concept) to FSCTL_SRV_REQUEST_RESUME_KEY
152  *
153  * The returned data is an (opaque to the client) 512-byte "token"
154  * that represents the specified range (offset, length) of the
155  * source file.  The "token" we return here comes back to us in an
156  * FSCTL_OFFLOAD_READ.  We must stash whatever we'll need then in
157  * the token we return here.
158  *
159  * We want server-side copy to be able to copy "holes" efficiently,
160  * but would rather avoid the complexity of encoding a list of all
161  * allocated ranges into our returned token, so this compromise:
162  *
163  * When the current range is entirely within a "hole", we'll return
164  * the special "zeros" token, and the offload write using that token
165  * will use the simple and very efficient smb_fsop_freesp.  In this
166  * scenario, we'll have a copy stride of smb2_odx_read_max (256M).
167  *
168  * When there's any data in the range to copy, we'll return our
169  * "native" token, and the subsequent offload_write will walk the
170  * allocated ranges copying and/or zeroing as needed.  In this
171  * scenario, we'll have a copy stride of smb2_odx_write_max (16M).
172  *
173  * One additional optimization allowed by the protocol is that when
174  * we discover that there's no more data after the current range,
175  * we can set the flag ..._ALL_ZERO_BEYOND which tells that client
176  * they can stop copying here if they like.
177  */
178 uint32_t
179 smb2_fsctl_odx_read(smb_request_t *sr, smb_fsctl_t *fsctl)
180 {
181 	smb_attr_t src_attr;
182 	smb_odx_token_t *tok = NULL;
183 	struct tok_native1 *tn1;
184 	smb_ofile_t *ofile = sr->fid_ofile;
185 	uint64_t src_size, src_rnd_size;
186 	off64_t data, hole;
187 	uint32_t in_struct_size;
188 	uint32_t in_flags;
189 	uint32_t in_ttl;
190 	uint64_t in_file_off;
191 	uint64_t in_copy_len;
192 	uint64_t out_xlen;
193 	uint32_t out_struct_size = TOKEN_TOTAL_SIZE + 16;
194 	uint32_t out_flags = 0;
195 	uint32_t status;
196 	uint32_t tok_type;
197 	int rc;
198 
199 	if (smb2_odx_enable == 0)
200 		return (NT_STATUS_NOT_SUPPORTED);
201 
202 	/*
203 	 * Make sure the (src) ofile granted access allows read.
204 	 * [MS-FSA] didn't mention this, so it's not clear where
205 	 * this should happen relative to other checks.  Usually
206 	 * access checks happen early.
207 	 */
208 	status = smb_ofile_access(ofile, ofile->f_cr, FILE_READ_DATA);
209 	if (status != NT_STATUS_SUCCESS)
210 		return (status);
211 
212 	/*
213 	 * Decode FSCTL_OFFLOAD_READ_INPUT struct,
214 	 * and do in/out size checks.
215 	 */
216 	rc = smb_mbc_decodef(
217 	    fsctl->in_mbc, "lll4.qq",
218 	    &in_struct_size,	/* l */
219 	    &in_flags,		/* l */
220 	    &in_ttl,		/* l */
221 	    /* reserved		4. */
222 	    &in_file_off,	/* q */
223 	    &in_copy_len);	/* q */
224 	if (rc != 0)
225 		return (NT_STATUS_BUFFER_TOO_SMALL);
226 	if (fsctl->MaxOutputResp < out_struct_size)
227 		return (NT_STATUS_BUFFER_TOO_SMALL);
228 
229 	/*
230 	 * More arg checking per MS-FSA
231 	 */
232 	if ((in_file_off & OFFMASK) != 0 ||
233 	    (in_copy_len & OFFMASK) != 0)
234 		return (NT_STATUS_INVALID_PARAMETER);
235 	if (in_struct_size != 32)
236 		return (NT_STATUS_INVALID_PARAMETER);
237 	if (in_file_off > INT64_MAX ||
238 	    (in_file_off + in_copy_len) < in_file_off)
239 		return (NT_STATUS_INVALID_PARAMETER);
240 
241 	/*
242 	 * [MS-FSA] (summarizing)
243 	 * If not data stream, or if sparse, encrypted, compressed...
244 	 * return STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED.
245 	 *
246 	 * We'll ignore most of those except to require:
247 	 * Plain file, not a stream.
248 	 */
249 	if (!smb_node_is_file(ofile->f_node))
250 		return (NT_STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED);
251 	if (SMB_IS_STREAM(ofile->f_node))
252 		return (NT_STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED);
253 
254 	/*
255 	 * [MS-FSA] If Open.Stream.IsDeleted ...
256 	 * We don't really have this.
257 	 */
258 
259 	/*
260 	 * If CopyLength == 0, "return immediately success".
261 	 */
262 	if (in_copy_len == 0) {
263 		out_xlen = 0;
264 		tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
265 		goto done;
266 	}
267 
268 	/*
269 	 * Check for lock conflicting with the read.
270 	 */
271 	status = smb_lock_range_access(sr, ofile->f_node,
272 	    in_file_off, in_copy_len, B_FALSE);
273 	if (status != 0)
274 		return (status); /* == FILE_LOCK_CONFLICT */
275 
276 	/*
277 	 * Get the file size (rounded to a full block)
278 	 * and check the requested offset.
279 	 */
280 	bzero(&src_attr, sizeof (src_attr));
281 	src_attr.sa_mask = SMB_AT_SIZE;
282 	status = smb2_ofile_getattr(sr, ofile, &src_attr);
283 	if (status != NT_STATUS_SUCCESS)
284 		return (status);
285 	src_size = src_attr.sa_vattr.va_size;
286 	if (in_file_off >= src_size)
287 		return (NT_STATUS_END_OF_FILE);
288 
289 	/*
290 	 * Limit the transfer length based on (rounded) EOF.
291 	 * Clients expect ranges of whole disk blocks.
292 	 * If we get a read in this rounded-up range,
293 	 * we'll supply zeros.
294 	 */
295 	src_rnd_size = (src_size + OFFMASK) & ~OFFMASK;
296 	out_xlen = in_copy_len;
297 	if ((in_file_off + out_xlen) > src_rnd_size)
298 		out_xlen = src_rnd_size - in_file_off;
299 
300 	/*
301 	 * Also, have the client come back for a new token after every
302 	 * smb2_odx_read_max bytes, so we'll have opportunities to
303 	 * recognize "holes" in the source file.
304 	 */
305 	if (out_xlen > smb2_odx_read_max)
306 		out_xlen = smb2_odx_read_max;
307 
308 	/*
309 	 * Ask the filesystem if there are any allocated regions in
310 	 * the requested range, and return either the "zeros" token
311 	 * or our "native" token as appropriate (details above).
312 	 */
313 	data = in_file_off;
314 	tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1;
315 	if (sr->sr_state != SMB_REQ_STATE_ACTIVE)
316 		return (NT_STATUS_SUCCESS);
317 	rc = smb_fsop_next_alloc_range(ofile->f_cr, ofile->f_node,
318 	    &data, &hole);
319 	switch (rc) {
320 	case 0:
321 		/* Found some data.  Is it beyond this range? */
322 		if (data >= (in_file_off + out_xlen))
323 			tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
324 		break;
325 	case ENXIO:
326 		/* No data here or following. */
327 		tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
328 		out_flags |= OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND;
329 		break;
330 	case ENOSYS:	/* FS does not support VOP_IOCTL... */
331 	case ENOTTY:	/* ... or _FIO_SEEK_DATA, _HOLE */
332 		break;
333 	default:
334 		cmn_err(CE_NOTE, "smb_fsop_next_alloc_range: rc=%d", rc);
335 		break;
336 	}
337 
338 done:
339 	/* Already checked MaxOutputResp */
340 	(void) smb_mbc_encodef(
341 	    fsctl->out_mbc, "llq",
342 	    out_struct_size,	/* l */
343 	    out_flags,		/* l */
344 	    out_xlen);		/* q */
345 
346 	/*
347 	 * Build the ODX token to return
348 	 */
349 	tok = smb_srm_zalloc(sr, sizeof (*tok));
350 	tok->tok_type = tok_type;
351 	tok->tok_reserved = 0;
352 	if (tok_type == STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1) {
353 		tok->tok_len = sizeof (*tn1);
354 		tn1 = &tok->tok_u.u_tok_native1;
355 		tn1->tn1_fid.persistent = ofile->f_persistid;
356 		tn1->tn1_fid.temporal = ofile->f_fid;
357 		tn1->tn1_off = in_file_off;
358 		tn1->tn1_eof = src_size;
359 		tn1->tn1_tid = sr->smb_tid;
360 	}
361 
362 	rc = smb_odx_put_token(fsctl->out_mbc, tok);
363 	if (rc != 0)
364 		return (NT_STATUS_BUFFER_TOO_SMALL);
365 
366 	return (NT_STATUS_SUCCESS);
367 }
368 
369 /*
370  * FSCTL_SRV_OFFLOAD_WRITE
371  * [MS-FSCC] 2.3.80
372  *
373  * Similar (in concept) to FSCTL_COPYCHUNK_WRITE
374  *
375  * Copies from a source file identified by a "token"
376  * (previously returned by FSCTL_OFFLOAD_READ)
377  * to the file on which the ioctl is issued.
378  */
379 uint32_t
380 smb2_fsctl_odx_write(smb_request_t *sr, smb_fsctl_t *fsctl)
381 {
382 	smb_attr_t dst_attr;
383 	odx_write_args_t args;
384 	smb_odx_token_t *tok = NULL;
385 	smb_ofile_t *ofile = sr->fid_ofile;
386 	uint64_t dst_rnd_size;
387 	uint32_t status = NT_STATUS_INVALID_PARAMETER;
388 	int rc;
389 
390 	bzero(&args, sizeof (args));
391 	args.out_struct_size = 16;
392 
393 	if (smb2_odx_enable == 0)
394 		return (NT_STATUS_NOT_SUPPORTED);
395 
396 	/*
397 	 * Make sure the (dst) ofile granted_access allows write.
398 	 * [MS-FSA] didn't mention this, so it's not clear where
399 	 * this should happen relative to other checks.  Usually
400 	 * access checks happen early.
401 	 */
402 	status = smb_ofile_access(ofile, ofile->f_cr, FILE_WRITE_DATA);
403 	if (status != NT_STATUS_SUCCESS)
404 		return (status);
405 
406 	/*
407 	 * Decode FSCTL_OFFLOAD_WRITE_INPUT struct,
408 	 * and do in/out size checks.
409 	 */
410 	rc = smb_mbc_decodef(
411 	    fsctl->in_mbc, "llqqq",
412 	    &args.in_struct_size,	/* l */
413 	    &args.in_flags,		/* l */
414 	    &args.in_dstoff,		/* q */
415 	    &args.in_xlen,		/* q */
416 	    &args.in_xoff);		/* q */
417 	if (rc != 0)
418 		return (NT_STATUS_BUFFER_TOO_SMALL);
419 	tok = smb_srm_zalloc(sr, sizeof (*tok));
420 	rc = smb_odx_get_token(fsctl->in_mbc, tok);
421 	if (rc != 0)
422 		return (NT_STATUS_BUFFER_TOO_SMALL);
423 	if (fsctl->MaxOutputResp < args.out_struct_size)
424 		return (NT_STATUS_BUFFER_TOO_SMALL);
425 
426 	/*
427 	 * More arg checking per MS-FSA
428 	 */
429 	if ((args.in_dstoff & OFFMASK) != 0 ||
430 	    (args.in_xoff & OFFMASK) != 0 ||
431 	    (args.in_xlen & OFFMASK) != 0)
432 		return (NT_STATUS_INVALID_PARAMETER);
433 	if (args.in_struct_size != (TOKEN_TOTAL_SIZE + 32))
434 		return (NT_STATUS_INVALID_PARAMETER);
435 	if (args.in_dstoff > INT64_MAX ||
436 	    (args.in_dstoff + args.in_xlen) < args.in_dstoff)
437 		return (NT_STATUS_INVALID_PARAMETER);
438 
439 	/*
440 	 * If CopyLength == 0, "return immediately success".
441 	 */
442 	if (args.in_xlen == 0) {
443 		status = 0;
444 		goto done;
445 	}
446 
447 	/*
448 	 * [MS-FSA] (summarizing)
449 	 * If not data stream, or if sparse, encrypted, compressed...
450 	 * return STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED.
451 	 *
452 	 * We'll ignore most of those except to require:
453 	 * Plain file, not a stream.
454 	 */
455 	if (!smb_node_is_file(ofile->f_node))
456 		return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED);
457 	if (SMB_IS_STREAM(ofile->f_node))
458 		return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED);
459 
460 	/*
461 	 * [MS-FSA] If Open.Stream.IsDeleted ...
462 	 * We don't really have such a thing.
463 	 * Also skip Volume.MaxFileSize check.
464 	 */
465 
466 	/*
467 	 * Check for lock conflicting with the write.
468 	 */
469 	status = smb_lock_range_access(sr, ofile->f_node,
470 	    args.in_dstoff, args.in_xlen, B_TRUE);
471 	if (status != 0)
472 		return (status); /* == FILE_LOCK_CONFLICT */
473 
474 	/*
475 	 * Need the file size
476 	 */
477 	bzero(&dst_attr, sizeof (dst_attr));
478 	dst_attr.sa_mask = SMB_AT_SIZE;
479 	status = smb2_ofile_getattr(sr, ofile, &dst_attr);
480 	if (status != NT_STATUS_SUCCESS)
481 		return (status);
482 	args.wa_eof = dst_attr.sa_vattr.va_size;
483 	dst_rnd_size = (args.wa_eof + OFFMASK) & ~OFFMASK;
484 
485 	/*
486 	 * Destination offset vs. EOF
487 	 */
488 	if (args.in_dstoff >= args.wa_eof)
489 		return (NT_STATUS_END_OF_FILE);
490 
491 	/*
492 	 * Destination offset+len vs. EOF
493 	 *
494 	 * The spec. is silent about copying when the file length is
495 	 * not block aligned, but clients appear to ask us to copy a
496 	 * range that's rounded up to a block size.  We'll limit the
497 	 * transfer size to the rounded up file size, but the actual
498 	 * copy will stop at EOF (args.wa_eof).
499 	 */
500 	if ((args.in_dstoff + args.in_xlen) > dst_rnd_size)
501 		args.in_xlen = dst_rnd_size - args.in_dstoff;
502 
503 	/*
504 	 * Finally, run the I/O
505 	 */
506 	switch (tok->tok_type) {
507 	case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
508 		status = smb2_fsctl_odx_write_zeros(sr, &args);
509 		break;
510 	case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
511 		status = smb2_fsctl_odx_write_native1(sr, &args, tok);
512 		break;
513 	default:
514 		status = NT_STATUS_INVALID_TOKEN;
515 		break;
516 	}
517 
518 done:
519 	/*
520 	 * Checked MaxOutputResp above, so we can ignore errors
521 	 * from mbc_encodef here.
522 	 */
523 	if (status == NT_STATUS_SUCCESS) {
524 		(void) smb_mbc_encodef(
525 		    fsctl->out_mbc, "llq",
526 		    args.out_struct_size,
527 		    args.out_flags,
528 		    args.out_xlen);
529 	}
530 
531 	return (status);
532 }
533 
534 /*
535  * Handle FSCTL_OFFLOAD_WRITE with token type
536  * STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA
537  *
538  * In this handler, the "token" represents a source of zeros.
539  */
540 static uint32_t
541 smb2_fsctl_odx_write_zeros(smb_request_t *sr, odx_write_args_t *args)
542 {
543 	smb_ofile_t *dst_ofile = sr->fid_ofile;
544 	uint64_t xlen = args->in_xlen;
545 	uint32_t status = 0;
546 	int rc;
547 
548 	ASSERT(args->in_xlen > 0);
549 
550 	/*
551 	 * Limit the I/O size.  In here we're just doing freesp,
552 	 * which is assumed to require only meta-data I/O, so
553 	 * we'll allow up to smb2_odx_read_max (256M) per call.
554 	 * This is essentially just a double-check of the range
555 	 * we gave the client at the offload_read call, making
556 	 * sure they can't use a zero token for longer ranges
557 	 * than offload_read would allow.
558 	 */
559 	if (xlen > smb2_odx_read_max)
560 		xlen = smb2_odx_read_max;
561 
562 	/*
563 	 * Also limit to the actual file size, which may be
564 	 * smaller than the (block-aligned) transfer size.
565 	 * Report the rounded up size to the caller at EOF.
566 	 */
567 	args->out_xlen = xlen;
568 	if ((args->in_dstoff + xlen) > args->wa_eof)
569 		xlen = args->wa_eof - args->in_dstoff;
570 
571 	/*
572 	 * Arrange for zeros to appear in the range:
573 	 * in_dstoff, (in_dstoff + in_xlen)
574 	 *
575 	 * Just "free" the range and let it allocate as needed
576 	 * when someone later writes in this range.
577 	 */
578 	rc = smb_fsop_freesp(sr, dst_ofile->f_cr, dst_ofile,
579 	    args->in_dstoff, xlen);
580 	if (rc != 0) {
581 		status = smb_errno2status(rc);
582 		if (status == NT_STATUS_INVALID_PARAMETER ||
583 		    status == NT_STATUS_NOT_SUPPORTED)
584 			status = NT_STATUS_INVALID_DEVICE_REQUEST;
585 		args->out_xlen = 0;
586 	} else {
587 		status = 0;
588 	}
589 
590 	return (status);
591 }
592 
593 /*
594  * Handle FSCTL_OFFLOAD_WRITE with token type
595  * STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1
596  */
597 static uint32_t
598 smb2_fsctl_odx_write_native1(smb_request_t *sr,
599     odx_write_args_t *args, smb_odx_token_t *tok)
600 {
601 	struct tok_native1 *tn1;
602 	smb_ofile_t *dst_ofile = sr->fid_ofile;
603 	smb_ofile_t *src_ofile = NULL;
604 	void *buffer = NULL;
605 	size_t bufsize = smb2_odx_buf_size;
606 	uint64_t src_offset;
607 	uint32_t resid;
608 	uint32_t xlen;
609 	uint32_t status;
610 
611 	/*
612 	 * Lookup the source ofile using the resume key,
613 	 * which smb2_fsctl_offload_read encoded as an
614 	 * smb2fid_t.  Similar to smb2sr_lookup_fid(),
615 	 * but different error code.
616 	 */
617 	tn1 = &tok->tok_u.u_tok_native1;
618 
619 	/*
620 	 * If the source ofile came from another tree, we need to
621 	 * get the other tree and use it for the fid lookup.
622 	 * Do that by temporarily changing sr->tid_tree around
623 	 * the call to smb_ofile_lookup_by_fid().
624 	 */
625 	if (tn1->tn1_tid != sr->smb_tid) {
626 		smb_tree_t *saved_tree;
627 		smb_tree_t *src_tree;
628 
629 		src_tree = smb_session_lookup_tree(sr->session,
630 		    (uint16_t)tn1->tn1_tid);
631 		if (src_tree == NULL) {
632 			status = NT_STATUS_INVALID_TOKEN;
633 			goto out;
634 		}
635 
636 		saved_tree = sr->tid_tree;
637 		sr->tid_tree = src_tree;
638 
639 		src_ofile = smb_ofile_lookup_by_fid(sr,
640 		    (uint16_t)tn1->tn1_fid.temporal);
641 
642 		sr->tid_tree = saved_tree;
643 		smb_tree_release(src_tree);
644 	} else {
645 		src_ofile = smb_ofile_lookup_by_fid(sr,
646 		    (uint16_t)tn1->tn1_fid.temporal);
647 	}
648 
649 	if (src_ofile == NULL ||
650 	    src_ofile->f_persistid != tn1->tn1_fid.persistent) {
651 		status = NT_STATUS_INVALID_TOKEN;
652 		goto out;
653 	}
654 
655 	/*
656 	 * Make sure src_ofile is open on a regular file, and
657 	 * granted access includes READ_DATA
658 	 */
659 	if (!smb_node_is_file(src_ofile->f_node)) {
660 		status = NT_STATUS_ACCESS_DENIED;
661 		goto out;
662 	}
663 	status = smb_ofile_access(src_ofile, src_ofile->f_cr, FILE_READ_DATA);
664 	if (status != NT_STATUS_SUCCESS)
665 		goto out;
666 
667 	/*
668 	 * Limit the I/O size.  In here we're actually copying,
669 	 * so limit to smb2_odx_write_max (16M) per call.
670 	 * Note that xlen is a 32-bit value here.
671 	 */
672 	if (args->in_xlen > smb2_odx_write_max)
673 		xlen = smb2_odx_write_max;
674 	else
675 		xlen = (uint32_t)args->in_xlen;
676 
677 	/*
678 	 * Also limit to the actual file size, which may be
679 	 * smaller than the (block-aligned) transfer size.
680 	 * Report the rounded up size to the caller at EOF.
681 	 */
682 	args->out_xlen = xlen;
683 	if ((args->in_dstoff + xlen) > args->wa_eof)
684 		xlen = (uint32_t)(args->wa_eof - args->in_dstoff);
685 
686 	/*
687 	 * Note: in_xoff is relative to the beginning of the "token"
688 	 * (a range of the source file tn1_off, tn1_eof).  Make sure
689 	 * in_xoff is within the range represented by this token.
690 	 */
691 	src_offset = tn1->tn1_off + args->in_xoff;
692 	if (src_offset >= tn1->tn1_eof ||
693 	    src_offset < tn1->tn1_off) {
694 		status = NT_STATUS_INVALID_PARAMETER;
695 		goto out;
696 	}
697 
698 	/*
699 	 * Get a buffer used for copying, always
700 	 * smb2_odx_buf_size (1M)
701 	 *
702 	 * Rather than sleep for this relatively large allocation,
703 	 * allow the allocation to fail and return an error.
704 	 * The client should then fall back to normal copy.
705 	 */
706 	buffer = kmem_alloc(bufsize, KM_NOSLEEP_LAZY);
707 	if (buffer == NULL) {
708 		status = NT_STATUS_INSUFF_SERVER_RESOURCES;
709 		goto out;
710 	}
711 
712 	/*
713 	 * Copy src to dst for xlen
714 	 */
715 	resid = xlen;
716 	status = smb2_sparse_copy(sr, src_ofile, dst_ofile,
717 	    src_offset, args->in_dstoff, &resid, buffer, bufsize);
718 
719 	/*
720 	 * If the result was a partial copy, round down the
721 	 * reported transfer size to a block boundary.
722 	 */
723 	if (resid != 0) {
724 		xlen -= resid;
725 		xlen &= ~OFFMASK;
726 		args->out_xlen = xlen;
727 	}
728 
729 	/*
730 	 * If we did any I/O, ignore the error that stopped us.
731 	 * We'll report this error during the next call.
732 	 */
733 	if (args->out_xlen > 0)
734 		status = 0;
735 
736 out:
737 	if (src_ofile != NULL)
738 		smb_ofile_release(src_ofile);
739 
740 	if (buffer != NULL)
741 		kmem_free(buffer, bufsize);
742 
743 	return (status);
744 }
745 
746 /*
747  * Get an smb_odx_token_t from the (input) mbuf chain.
748  * Consumes exactly TOKEN_TOTAL_SIZE bytes.
749  */
750 static int
751 smb_odx_get_token(mbuf_chain_t *mbc, smb_odx_token_t *tok)
752 {
753 	mbuf_chain_t tok_mbc;
754 	int start_pos = mbc->chain_offset;
755 	int rc;
756 
757 	if (MBC_ROOM_FOR(mbc, TOKEN_TOTAL_SIZE) == 0)
758 		return (-1);
759 
760 	/*
761 	 * No big-endian support in smb_mbc_encodef, so swap
762 	 * the big-endian fields: tok_type (32-bits),
763 	 * (reserved is 16-bit zero, so no swap),
764 	 * and tok_len (16-bits)
765 	 */
766 	rc = smb_mbc_decodef(
767 	    mbc, "l..w",
768 	    &tok->tok_type,
769 	    /* tok_reserved */
770 	    &tok->tok_len);
771 	if (rc != 0)
772 		return (rc);
773 	tok->tok_type = BSWAP_32(tok->tok_type);
774 	tok->tok_len = BSWAP_16(tok->tok_len);
775 
776 	if (tok->tok_len > TOKEN_MAX_PAYLOAD)
777 		return (-1);
778 	rc = MBC_SHADOW_CHAIN(&tok_mbc, mbc,
779 	    mbc->chain_offset, tok->tok_len);
780 	if (rc != 0)
781 		return (rc);
782 
783 	switch (tok->tok_type) {
784 	case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
785 		/* no payload */
786 		break;
787 	case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
788 		rc = smb_odx_get_token_native1(&tok_mbc,
789 		    &tok->tok_u.u_tok_native1);
790 		break;
791 	default:
792 		/* caller will error out */
793 		break;
794 	}
795 
796 	if (rc == 0) {
797 		/* Advance past what we shadowed. */
798 		mbc->chain_offset = start_pos + TOKEN_TOTAL_SIZE;
799 	}
800 
801 	return (rc);
802 }
803 
804 static int
805 smb_odx_get_token_native1(mbuf_chain_t *mbc, struct tok_native1 *tn1)
806 {
807 	int rc;
808 
809 	rc = smb_mbc_decodef(
810 	    mbc, "qqqql",
811 	    &tn1->tn1_fid.persistent,
812 	    &tn1->tn1_fid.temporal,
813 	    &tn1->tn1_off,
814 	    &tn1->tn1_eof,
815 	    &tn1->tn1_tid);
816 
817 	return (rc);
818 }
819 
820 /*
821  * Put an smb_odx_token_t into the (output) mbuf chain,
822  * padded to TOKEN_TOTAL_SIZE bytes.
823  */
824 static int
825 smb_odx_put_token(mbuf_chain_t *mbc, smb_odx_token_t *tok)
826 {
827 	int rc, padlen;
828 	int start_pos = mbc->chain_offset;
829 	int end_pos = start_pos + TOKEN_TOTAL_SIZE;
830 
831 	if (tok->tok_len > TOKEN_MAX_PAYLOAD)
832 		return (-1);
833 
834 	/*
835 	 * No big-endian support in smb_mbc_encodef, so swap
836 	 * the big-endian fields: tok_type (32-bits),
837 	 * (reserved is 16-bit zero, so no swap),
838 	 * and tok_len (16-bits)
839 	 */
840 	rc = smb_mbc_encodef(
841 	    mbc, "lww",
842 	    BSWAP_32(tok->tok_type),
843 	    0, /* tok_reserved */
844 	    BSWAP_16(tok->tok_len));
845 	if (rc != 0)
846 		return (rc);
847 
848 	switch (tok->tok_type) {
849 	case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
850 		/* no payload */
851 		break;
852 	case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
853 		rc = smb_odx_put_token_native1(mbc,
854 		    &tok->tok_u.u_tok_native1);
855 		break;
856 	default:
857 		ASSERT(0);
858 		return (-1);
859 	}
860 
861 	/* Pad out to TOKEN_TOTAL_SIZE bytes. */
862 	if (mbc->chain_offset < end_pos) {
863 		padlen = end_pos - mbc->chain_offset;
864 		(void) smb_mbc_encodef(mbc, "#.", padlen);
865 	}
866 	ASSERT(mbc->chain_offset == end_pos);
867 
868 	return (rc);
869 }
870 
871 static int
872 smb_odx_put_token_native1(mbuf_chain_t *mbc, struct tok_native1 *tn1)
873 {
874 	int rc;
875 
876 	rc = smb_mbc_encodef(
877 	    mbc, "qqqql",
878 	    tn1->tn1_fid.persistent,
879 	    tn1->tn1_fid.temporal,
880 	    tn1->tn1_off,
881 	    tn1->tn1_eof,
882 	    tn1->tn1_tid);
883 
884 	return (rc);
885 }
886