xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb2_fsctl_odx.c (revision 08f2ce59ccfd4e449c92dd87b23e756e439d4daa)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2018-2021 Tintri by DDN, Inc.  All rights reserved.
14  * Copyright 2022 RackTop Systems, Inc.
15  */
16 
17 /*
18  * Support functions for smb2_ioctl/fsctl codes:
19  * FSCTL_SRV_OFFLOAD_READ
20  * FSCTL_SRV_OFFLOAD_WRITE
21  * (and related)
22  */
23 
24 #include <smbsrv/smb2_kproto.h>
25 #include <smbsrv/smb_fsops.h>
26 #include <smb/winioctl.h>
27 
28 /*
29  * Summary of how offload data transfer works:
30  *
31  * The client drives a server-side copy.  Outline:
32  * 1: open src_file
33  * 2: create dst_file and set its size
34  * 3: while src_file not all copied {
35  *        offload_read(src_file, &token);
36  *        while token not all copied {
37  *	      offload_write(dst_file, token);
38  *        }
39  *    }
40  *
41  * Each "offload read" request returns a "token" representing some
42  * portion of the source file.  The server decides what kind of
43  * token to use, and how much of the source file it should cover.
44  * The length represented may be less then the client requested.
45  * No data are copied during offload_read (just meta-data).
46  *
47  * Each "offload write" request copies some portion of the data
48  * represented by the "token" into the output file.  The amount
49  * of data copied may be less than the client requested, and the
50  * client keeps sending offload write requests until they have
51  * copied all the data represented by the current token.
52  */
53 
54 /* [MS-FSA] OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND_CURRENT_RANGE */
55 #define	OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND	1
56 
57 /*
58  * [MS-FSCC] 2.3.79 STORAGE_OFFLOAD_TOKEN
59  * Note reserved: 0xFFFF0002 – 0xFFFFFFFF
60  *
61  * ...TOKEN_TYPE_ZERO_DATA:  A well-known Token that indicates ...
62  * (offload write should just zero to the destination)
63  * The payload (tok_other) is ignored with this type.
64  */
65 #define	STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA	0xFFFF0001
66 
67 /* Our vendor-specific token type: struct tok_native1 */
68 #define	STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1	0x10001
69 
70 #define	TOKEN_TOTAL_SIZE	512
71 #define	TOKEN_MAX_PAYLOAD	504	/* 512 - 8 */
72 
73 /* This mask is for sanity checking offsets etc. */
74 #define	OFFMASK		((uint64_t)DEV_BSIZE-1)
75 
76 typedef struct smb_odx_token {
77 	uint32_t	tok_type;	/* big-endian on the wire */
78 	uint16_t	tok_reserved;	/* zero */
79 	uint16_t	tok_len;	/* big-endian on the wire */
80 	union {
81 		uint8_t u_tok_other[TOKEN_MAX_PAYLOAD];
82 		struct tok_native1 {
83 			smb2fid_t	tn1_fid;
84 			uint64_t	tn1_off;
85 			uint64_t	tn1_eof;
86 			uint32_t	tn1_tid;
87 		} u_tok_native1;
88 	} tok_u;
89 } smb_odx_token_t;
90 
91 typedef struct odx_write_args {
92 	uint32_t in_struct_size;
93 	uint32_t in_flags;
94 	uint64_t in_dstoff;
95 	uint64_t in_xlen;
96 	uint64_t in_xoff;
97 	uint32_t out_struct_size;
98 	uint32_t out_flags;
99 	uint64_t out_xlen;
100 	uint64_t wa_eof;
101 } odx_write_args_t;
102 
103 static int smb_odx_get_token(mbuf_chain_t *, smb_odx_token_t *);
104 static int smb_odx_get_token_native1(mbuf_chain_t *, struct tok_native1 *);
105 static int smb_odx_put_token(mbuf_chain_t *, smb_odx_token_t *);
106 static int smb_odx_put_token_native1(mbuf_chain_t *, struct tok_native1 *);
107 
108 static uint32_t smb2_fsctl_odx_write_zeros(smb_request_t *, odx_write_args_t *);
109 static uint32_t smb2_fsctl_odx_write_native1(smb_request_t *,
110     odx_write_args_t *, smb_odx_token_t *);
111 
112 
113 /* We can disable this feature for testing etc. */
114 int smb2_odx_enable = 1;
115 
116 /*
117  * These two variables determine the intervals of offload_read and
118  * offload_write calls (respectively) during an offload copy.
119  *
120  * For the offload read token we could offer a token representing
121  * the whole file, but we'll have the client come back for a new
122  * "token" after each 256M so we have a chance to look for "holes".
123  * This lets us use the special "zero" token while we're in any
124  * un-allocated parts of the file, so offload_write can use the
125  * (more efficient) smb_fsop_freesp instead of copying.
126  *
127  * We limit the size of offload_write to 16M per request so we
128  * don't end up taking so long with I/O that the client might
129  * time out the request.  Keep: write_max <= read_max
130  */
131 uint32_t smb2_odx_read_max = (1<<28); /* 256M */
132 uint32_t smb2_odx_write_max = (1<<24); /* 16M */
133 
134 /*
135  * This buffer size determines the I/O size for the copy during
136  * offoad write, where it will read/write using this buffer.
137  * Note: We kmem_alloc this, so don't make it HUGE.  It only
138  * needs to be large enough to allow the copy to proceed with
139  * reasonable efficiency.  1M is currently the largest possible
140  * block size with ZFS, so that's what we'll use here.
141  *
142  * Actually, limit this to kmem_max_cached, to avoid contention
143  * allocating from kmem_oversize_arena.
144  */
145 uint32_t smb2_odx_buf_size = (1<<17); /* 128k */
146 
147 
148 /*
149  * FSCTL_OFFLOAD_READ
150  * [MS-FSCC] 2.3.77
151  *
152  * Similar (in concept) to FSCTL_SRV_REQUEST_RESUME_KEY
153  *
154  * The returned data is an (opaque to the client) 512-byte "token"
155  * that represents the specified range (offset, length) of the
156  * source file.  The "token" we return here comes back to us in an
157  * FSCTL_OFFLOAD_READ.  We must stash whatever we'll need then in
158  * the token we return here.
159  *
160  * We want server-side copy to be able to copy "holes" efficiently,
161  * but would rather avoid the complexity of encoding a list of all
162  * allocated ranges into our returned token, so this compromise:
163  *
164  * When the current range is entirely within a "hole", we'll return
165  * the special "zeros" token, and the offload write using that token
166  * will use the simple and very efficient smb_fsop_freesp.  In this
167  * scenario, we'll have a copy stride of smb2_odx_read_max (256M).
168  *
169  * When there's any data in the range to copy, we'll return our
170  * "native" token, and the subsequent offload_write will walk the
171  * allocated ranges copying and/or zeroing as needed.  In this
172  * scenario, we'll have a copy stride of smb2_odx_write_max (16M).
173  *
174  * One additional optimization allowed by the protocol is that when
175  * we discover that there's no more data after the current range,
176  * we can set the flag ..._ALL_ZERO_BEYOND which tells that client
177  * they can stop copying here if they like.
178  */
179 uint32_t
180 smb2_fsctl_odx_read(smb_request_t *sr, smb_fsctl_t *fsctl)
181 {
182 	smb_attr_t src_attr;
183 	smb_odx_token_t *tok = NULL;
184 	struct tok_native1 *tn1;
185 	smb_ofile_t *ofile = sr->fid_ofile;
186 	uint64_t src_size, src_rnd_size;
187 	off64_t data, hole;
188 	uint32_t in_struct_size;
189 	uint32_t in_flags;
190 	uint32_t in_ttl;
191 	uint64_t in_file_off;
192 	uint64_t in_copy_len;
193 	uint64_t out_xlen;
194 	uint32_t out_struct_size = TOKEN_TOTAL_SIZE + 16;
195 	uint32_t out_flags = 0;
196 	uint32_t status;
197 	uint32_t tok_type;
198 	int rc;
199 
200 	if (smb2_odx_enable == 0)
201 		return (NT_STATUS_INVALID_DEVICE_REQUEST);
202 
203 	/*
204 	 * Make sure the (src) ofile granted access allows read.
205 	 * [MS-FSA] didn't mention this, so it's not clear where
206 	 * this should happen relative to other checks.  Usually
207 	 * access checks happen early.
208 	 */
209 	status = smb_ofile_access(ofile, ofile->f_cr, FILE_READ_DATA);
210 	if (status != NT_STATUS_SUCCESS)
211 		return (status);
212 
213 	/*
214 	 * Decode FSCTL_OFFLOAD_READ_INPUT struct,
215 	 * and do in/out size checks.
216 	 */
217 	rc = smb_mbc_decodef(
218 	    fsctl->in_mbc, "lll4.qq",
219 	    &in_struct_size,	/* l */
220 	    &in_flags,		/* l */
221 	    &in_ttl,		/* l */
222 	    /* reserved		4. */
223 	    &in_file_off,	/* q */
224 	    &in_copy_len);	/* q */
225 	if (rc != 0)
226 		return (NT_STATUS_BUFFER_TOO_SMALL);
227 	if (fsctl->MaxOutputResp < out_struct_size)
228 		return (NT_STATUS_BUFFER_TOO_SMALL);
229 
230 	/*
231 	 * More arg checking per MS-FSA
232 	 */
233 	if ((in_file_off & OFFMASK) != 0 ||
234 	    (in_copy_len & OFFMASK) != 0)
235 		return (NT_STATUS_INVALID_PARAMETER);
236 	if (in_struct_size != 32)
237 		return (NT_STATUS_INVALID_PARAMETER);
238 	if (in_file_off > INT64_MAX ||
239 	    (in_file_off + in_copy_len) < in_file_off)
240 		return (NT_STATUS_INVALID_PARAMETER);
241 
242 	/*
243 	 * [MS-FSA] (summarizing)
244 	 * If not data stream, or if sparse, encrypted, compressed...
245 	 * return STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED.
246 	 *
247 	 * We'll ignore most of those except to require:
248 	 * Plain file, not a stream.
249 	 */
250 	if (!smb_node_is_file(ofile->f_node))
251 		return (NT_STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED);
252 	if (SMB_IS_STREAM(ofile->f_node))
253 		return (NT_STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED);
254 
255 	/*
256 	 * [MS-FSA] If Open.Stream.IsDeleted ...
257 	 * We don't really have this.
258 	 */
259 
260 	/*
261 	 * If CopyLength == 0, "return immediately success".
262 	 */
263 	if (in_copy_len == 0) {
264 		out_xlen = 0;
265 		tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
266 		goto done;
267 	}
268 
269 	/*
270 	 * Check for lock conflicting with the read.
271 	 */
272 	status = smb_lock_range_access(sr, ofile->f_node,
273 	    in_file_off, in_copy_len, B_FALSE);
274 	if (status != 0)
275 		return (status); /* == FILE_LOCK_CONFLICT */
276 
277 	/*
278 	 * Get the file size (rounded to a full block)
279 	 * and check the requested offset.
280 	 */
281 	bzero(&src_attr, sizeof (src_attr));
282 	src_attr.sa_mask = SMB_AT_SIZE;
283 	status = smb2_ofile_getattr(sr, ofile, &src_attr);
284 	if (status != NT_STATUS_SUCCESS)
285 		return (status);
286 	src_size = src_attr.sa_vattr.va_size;
287 	if (in_file_off >= src_size)
288 		return (NT_STATUS_END_OF_FILE);
289 
290 	/*
291 	 * Limit the transfer length based on (rounded) EOF.
292 	 * Clients expect ranges of whole disk blocks.
293 	 * If we get a read in this rounded-up range,
294 	 * we'll supply zeros.
295 	 */
296 	src_rnd_size = (src_size + OFFMASK) & ~OFFMASK;
297 	out_xlen = in_copy_len;
298 	if ((in_file_off + out_xlen) > src_rnd_size)
299 		out_xlen = src_rnd_size - in_file_off;
300 
301 	/*
302 	 * Also, have the client come back for a new token after every
303 	 * smb2_odx_read_max bytes, so we'll have opportunities to
304 	 * recognize "holes" in the source file.
305 	 */
306 	if (out_xlen > smb2_odx_read_max)
307 		out_xlen = smb2_odx_read_max;
308 
309 	/*
310 	 * Ask the filesystem if there are any allocated regions in
311 	 * the requested range, and return either the "zeros" token
312 	 * or our "native" token as appropriate (details above).
313 	 */
314 	data = in_file_off;
315 	tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1;
316 	if (sr->sr_state != SMB_REQ_STATE_ACTIVE)
317 		return (NT_STATUS_SUCCESS);
318 	rc = smb_fsop_next_alloc_range(ofile->f_cr, ofile->f_node,
319 	    &data, &hole);
320 	switch (rc) {
321 	case 0:
322 		/* Found some data.  Is it beyond this range? */
323 		if (data >= (in_file_off + out_xlen))
324 			tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
325 		break;
326 	case ENXIO:
327 		/* No data here or following. */
328 		tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
329 		out_flags |= OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND;
330 		break;
331 	case ENOSYS:	/* FS does not support VOP_IOCTL... */
332 	case ENOTTY:	/* ... or _FIO_SEEK_DATA, _HOLE */
333 		break;
334 	default:
335 		cmn_err(CE_NOTE, "smb_fsop_next_alloc_range: rc=%d", rc);
336 		break;
337 	}
338 
339 done:
340 	/* Already checked MaxOutputResp */
341 	(void) smb_mbc_encodef(
342 	    fsctl->out_mbc, "llq",
343 	    out_struct_size,	/* l */
344 	    out_flags,		/* l */
345 	    out_xlen);		/* q */
346 
347 	/*
348 	 * Build the ODX token to return
349 	 */
350 	tok = smb_srm_zalloc(sr, sizeof (*tok));
351 	tok->tok_type = tok_type;
352 	tok->tok_reserved = 0;
353 	if (tok_type == STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1) {
354 		tok->tok_len = sizeof (*tn1);
355 		tn1 = &tok->tok_u.u_tok_native1;
356 		tn1->tn1_fid.persistent = ofile->f_persistid;
357 		tn1->tn1_fid.temporal = ofile->f_fid;
358 		tn1->tn1_off = in_file_off;
359 		tn1->tn1_eof = src_size;
360 		tn1->tn1_tid = sr->smb_tid;
361 	}
362 
363 	rc = smb_odx_put_token(fsctl->out_mbc, tok);
364 	if (rc != 0)
365 		return (NT_STATUS_BUFFER_TOO_SMALL);
366 
367 	return (NT_STATUS_SUCCESS);
368 }
369 
370 /*
371  * FSCTL_SRV_OFFLOAD_WRITE
372  * [MS-FSCC] 2.3.80
373  *
374  * Similar (in concept) to FSCTL_COPYCHUNK_WRITE
375  *
376  * Copies from a source file identified by a "token"
377  * (previously returned by FSCTL_OFFLOAD_READ)
378  * to the file on which the ioctl is issued.
379  */
380 uint32_t
381 smb2_fsctl_odx_write(smb_request_t *sr, smb_fsctl_t *fsctl)
382 {
383 	smb_attr_t dst_attr;
384 	odx_write_args_t args;
385 	smb_odx_token_t *tok = NULL;
386 	smb_ofile_t *ofile = sr->fid_ofile;
387 	uint64_t dst_rnd_size;
388 	uint32_t status = NT_STATUS_INVALID_PARAMETER;
389 	int rc;
390 
391 	bzero(&args, sizeof (args));
392 	args.out_struct_size = 16;
393 
394 	if (smb2_odx_enable == 0)
395 		return (NT_STATUS_INVALID_DEVICE_REQUEST);
396 
397 	/*
398 	 * Make sure the (dst) ofile granted_access allows write.
399 	 * [MS-FSA] didn't mention this, so it's not clear where
400 	 * this should happen relative to other checks.  Usually
401 	 * access checks happen early.
402 	 */
403 	status = smb_ofile_access(ofile, ofile->f_cr, FILE_WRITE_DATA);
404 	if (status != NT_STATUS_SUCCESS)
405 		return (status);
406 
407 	/*
408 	 * Decode FSCTL_OFFLOAD_WRITE_INPUT struct,
409 	 * and do in/out size checks.
410 	 */
411 	rc = smb_mbc_decodef(
412 	    fsctl->in_mbc, "llqqq",
413 	    &args.in_struct_size,	/* l */
414 	    &args.in_flags,		/* l */
415 	    &args.in_dstoff,		/* q */
416 	    &args.in_xlen,		/* q */
417 	    &args.in_xoff);		/* q */
418 	if (rc != 0)
419 		return (NT_STATUS_BUFFER_TOO_SMALL);
420 	tok = smb_srm_zalloc(sr, sizeof (*tok));
421 	rc = smb_odx_get_token(fsctl->in_mbc, tok);
422 	if (rc != 0)
423 		return (NT_STATUS_BUFFER_TOO_SMALL);
424 	if (fsctl->MaxOutputResp < args.out_struct_size)
425 		return (NT_STATUS_BUFFER_TOO_SMALL);
426 
427 	/*
428 	 * More arg checking per MS-FSA
429 	 */
430 	if ((args.in_dstoff & OFFMASK) != 0 ||
431 	    (args.in_xoff & OFFMASK) != 0 ||
432 	    (args.in_xlen & OFFMASK) != 0)
433 		return (NT_STATUS_INVALID_PARAMETER);
434 	if (args.in_struct_size != (TOKEN_TOTAL_SIZE + 32))
435 		return (NT_STATUS_INVALID_PARAMETER);
436 	if (args.in_dstoff > INT64_MAX ||
437 	    (args.in_dstoff + args.in_xlen) < args.in_dstoff)
438 		return (NT_STATUS_INVALID_PARAMETER);
439 
440 	/*
441 	 * If CopyLength == 0, "return immediately success".
442 	 */
443 	if (args.in_xlen == 0) {
444 		status = 0;
445 		goto done;
446 	}
447 
448 	/*
449 	 * [MS-FSA] (summarizing)
450 	 * If not data stream, or if sparse, encrypted, compressed...
451 	 * return STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED.
452 	 *
453 	 * We'll ignore most of those except to require:
454 	 * Plain file, not a stream.
455 	 */
456 	if (!smb_node_is_file(ofile->f_node))
457 		return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED);
458 	if (SMB_IS_STREAM(ofile->f_node))
459 		return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED);
460 
461 	/*
462 	 * [MS-FSA] If Open.Stream.IsDeleted ...
463 	 * We don't really have such a thing.
464 	 * Also skip Volume.MaxFileSize check.
465 	 */
466 
467 	/*
468 	 * Check for lock conflicting with the write.
469 	 */
470 	status = smb_lock_range_access(sr, ofile->f_node,
471 	    args.in_dstoff, args.in_xlen, B_TRUE);
472 	if (status != 0)
473 		return (status); /* == FILE_LOCK_CONFLICT */
474 
475 	/*
476 	 * Need the file size
477 	 */
478 	bzero(&dst_attr, sizeof (dst_attr));
479 	dst_attr.sa_mask = SMB_AT_SIZE;
480 	status = smb2_ofile_getattr(sr, ofile, &dst_attr);
481 	if (status != NT_STATUS_SUCCESS)
482 		return (status);
483 	args.wa_eof = dst_attr.sa_vattr.va_size;
484 	dst_rnd_size = (args.wa_eof + OFFMASK) & ~OFFMASK;
485 
486 	/*
487 	 * Destination offset vs. EOF
488 	 */
489 	if (args.in_dstoff >= args.wa_eof)
490 		return (NT_STATUS_END_OF_FILE);
491 
492 	/*
493 	 * Destination offset+len vs. EOF
494 	 *
495 	 * The spec. is silent about copying when the file length is
496 	 * not block aligned, but clients appear to ask us to copy a
497 	 * range that's rounded up to a block size.  We'll limit the
498 	 * transfer size to the rounded up file size, but the actual
499 	 * copy will stop at EOF (args.wa_eof).
500 	 */
501 	if ((args.in_dstoff + args.in_xlen) > dst_rnd_size)
502 		args.in_xlen = dst_rnd_size - args.in_dstoff;
503 
504 	/*
505 	 * Finally, run the I/O
506 	 */
507 	switch (tok->tok_type) {
508 	case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
509 		status = smb2_fsctl_odx_write_zeros(sr, &args);
510 		break;
511 	case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
512 		status = smb2_fsctl_odx_write_native1(sr, &args, tok);
513 		break;
514 	default:
515 		status = NT_STATUS_INVALID_TOKEN;
516 		break;
517 	}
518 
519 done:
520 	/*
521 	 * Checked MaxOutputResp above, so we can ignore errors
522 	 * from mbc_encodef here.
523 	 */
524 	if (status == NT_STATUS_SUCCESS) {
525 		(void) smb_mbc_encodef(
526 		    fsctl->out_mbc, "llq",
527 		    args.out_struct_size,
528 		    args.out_flags,
529 		    args.out_xlen);
530 	}
531 
532 	return (status);
533 }
534 
535 /*
536  * Handle FSCTL_OFFLOAD_WRITE with token type
537  * STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA
538  *
539  * In this handler, the "token" represents a source of zeros.
540  */
541 static uint32_t
542 smb2_fsctl_odx_write_zeros(smb_request_t *sr, odx_write_args_t *args)
543 {
544 	smb_ofile_t *dst_ofile = sr->fid_ofile;
545 	uint64_t xlen = args->in_xlen;
546 	uint32_t status = 0;
547 	int rc;
548 
549 	ASSERT(args->in_xlen > 0);
550 
551 	/*
552 	 * Limit the I/O size.  In here we're just doing freesp,
553 	 * which is assumed to require only meta-data I/O, so
554 	 * we'll allow up to smb2_odx_read_max (256M) per call.
555 	 * This is essentially just a double-check of the range
556 	 * we gave the client at the offload_read call, making
557 	 * sure they can't use a zero token for longer ranges
558 	 * than offload_read would allow.
559 	 */
560 	if (xlen > smb2_odx_read_max)
561 		xlen = smb2_odx_read_max;
562 
563 	/*
564 	 * Also limit to the actual file size, which may be
565 	 * smaller than the (block-aligned) transfer size.
566 	 * Report the rounded up size to the caller at EOF.
567 	 */
568 	args->out_xlen = xlen;
569 	if ((args->in_dstoff + xlen) > args->wa_eof)
570 		xlen = args->wa_eof - args->in_dstoff;
571 
572 	/*
573 	 * Arrange for zeros to appear in the range:
574 	 * in_dstoff, (in_dstoff + in_xlen)
575 	 *
576 	 * Just "free" the range and let it allocate as needed
577 	 * when someone later writes in this range.
578 	 */
579 	rc = smb_fsop_freesp(sr, dst_ofile->f_cr, dst_ofile,
580 	    args->in_dstoff, xlen);
581 	if (rc != 0) {
582 		status = smb_errno2status(rc);
583 		if (status == NT_STATUS_INVALID_PARAMETER ||
584 		    status == NT_STATUS_NOT_SUPPORTED)
585 			status = NT_STATUS_INVALID_DEVICE_REQUEST;
586 		args->out_xlen = 0;
587 	} else {
588 		status = 0;
589 	}
590 
591 	return (status);
592 }
593 
594 /*
595  * Handle FSCTL_OFFLOAD_WRITE with token type
596  * STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1
597  */
598 static uint32_t
599 smb2_fsctl_odx_write_native1(smb_request_t *sr,
600     odx_write_args_t *args, smb_odx_token_t *tok)
601 {
602 	struct tok_native1 *tn1;
603 	smb_ofile_t *dst_ofile = sr->fid_ofile;
604 	smb_ofile_t *src_ofile = NULL;
605 	void *buffer = NULL;
606 	size_t bufsize = smb2_odx_buf_size;
607 	uint64_t src_offset;
608 	uint32_t resid;
609 	uint32_t xlen;
610 	uint32_t status;
611 
612 	/*
613 	 * Lookup the source ofile using the resume key,
614 	 * which smb2_fsctl_offload_read encoded as an
615 	 * smb2fid_t.  Similar to smb2sr_lookup_fid(),
616 	 * but different error code.
617 	 */
618 	tn1 = &tok->tok_u.u_tok_native1;
619 
620 	/*
621 	 * If the source ofile came from another tree, we need to
622 	 * get the other tree and use it for the fid lookup.
623 	 * Do that by temporarily changing sr->tid_tree around
624 	 * the call to smb_ofile_lookup_by_fid().
625 	 */
626 	if (tn1->tn1_tid != sr->smb_tid) {
627 		smb_tree_t *saved_tree;
628 		smb_tree_t *src_tree;
629 
630 		src_tree = smb_session_lookup_tree(sr->session,
631 		    (uint16_t)tn1->tn1_tid);
632 		if (src_tree == NULL) {
633 			status = NT_STATUS_INVALID_TOKEN;
634 			goto out;
635 		}
636 
637 		saved_tree = sr->tid_tree;
638 		sr->tid_tree = src_tree;
639 
640 		src_ofile = smb_ofile_lookup_by_fid(sr,
641 		    (uint16_t)tn1->tn1_fid.temporal);
642 
643 		sr->tid_tree = saved_tree;
644 		smb_tree_release(src_tree);
645 	} else {
646 		src_ofile = smb_ofile_lookup_by_fid(sr,
647 		    (uint16_t)tn1->tn1_fid.temporal);
648 	}
649 
650 	if (src_ofile == NULL ||
651 	    src_ofile->f_persistid != tn1->tn1_fid.persistent) {
652 		status = NT_STATUS_INVALID_TOKEN;
653 		goto out;
654 	}
655 
656 	/*
657 	 * Make sure src_ofile is open on a regular file, and
658 	 * granted access includes READ_DATA
659 	 */
660 	if (!smb_node_is_file(src_ofile->f_node)) {
661 		status = NT_STATUS_ACCESS_DENIED;
662 		goto out;
663 	}
664 	status = smb_ofile_access(src_ofile, src_ofile->f_cr, FILE_READ_DATA);
665 	if (status != NT_STATUS_SUCCESS)
666 		goto out;
667 
668 	/*
669 	 * Limit the I/O size.  In here we're actually copying,
670 	 * so limit to smb2_odx_write_max (16M) per call.
671 	 * Note that xlen is a 32-bit value here.
672 	 */
673 	if (args->in_xlen > smb2_odx_write_max)
674 		xlen = smb2_odx_write_max;
675 	else
676 		xlen = (uint32_t)args->in_xlen;
677 
678 	/*
679 	 * Also limit to the actual file size, which may be
680 	 * smaller than the (block-aligned) transfer size.
681 	 * Report the rounded up size to the caller at EOF.
682 	 */
683 	args->out_xlen = xlen;
684 	if ((args->in_dstoff + xlen) > args->wa_eof)
685 		xlen = (uint32_t)(args->wa_eof - args->in_dstoff);
686 
687 	/*
688 	 * Note: in_xoff is relative to the beginning of the "token"
689 	 * (a range of the source file tn1_off, tn1_eof).  Make sure
690 	 * in_xoff is within the range represented by this token.
691 	 */
692 	src_offset = tn1->tn1_off + args->in_xoff;
693 	if (src_offset >= tn1->tn1_eof ||
694 	    src_offset < tn1->tn1_off) {
695 		status = NT_STATUS_INVALID_PARAMETER;
696 		goto out;
697 	}
698 
699 	/*
700 	 * Get a buffer used for copying, always
701 	 * smb2_odx_buf_size (1M)
702 	 *
703 	 * Rather than sleep for this relatively large allocation,
704 	 * allow the allocation to fail and return an error.
705 	 * The client should then fall back to normal copy.
706 	 */
707 	buffer = kmem_alloc(bufsize, KM_NOSLEEP_LAZY);
708 	if (buffer == NULL) {
709 		status = NT_STATUS_INSUFF_SERVER_RESOURCES;
710 		goto out;
711 	}
712 
713 	/*
714 	 * Copy src to dst for xlen
715 	 */
716 	resid = xlen;
717 	status = smb2_sparse_copy(sr, src_ofile, dst_ofile,
718 	    src_offset, args->in_dstoff, &resid, buffer, bufsize);
719 
720 	/*
721 	 * If the result was a partial copy, round down the
722 	 * reported transfer size to a block boundary.
723 	 */
724 	if (resid != 0) {
725 		xlen -= resid;
726 		xlen &= ~OFFMASK;
727 		args->out_xlen = xlen;
728 	}
729 
730 	/*
731 	 * If we did any I/O, ignore the error that stopped us.
732 	 * We'll report this error during the next call.
733 	 */
734 	if (args->out_xlen > 0)
735 		status = 0;
736 
737 out:
738 	if (src_ofile != NULL)
739 		smb_ofile_release(src_ofile);
740 
741 	if (buffer != NULL)
742 		kmem_free(buffer, bufsize);
743 
744 	return (status);
745 }
746 
747 /*
748  * Get an smb_odx_token_t from the (input) mbuf chain.
749  * Consumes exactly TOKEN_TOTAL_SIZE bytes.
750  */
751 static int
752 smb_odx_get_token(mbuf_chain_t *mbc, smb_odx_token_t *tok)
753 {
754 	mbuf_chain_t tok_mbc;
755 	int start_pos = mbc->chain_offset;
756 	int rc;
757 
758 	if (MBC_ROOM_FOR(mbc, TOKEN_TOTAL_SIZE) == 0)
759 		return (-1);
760 
761 	/*
762 	 * No big-endian support in smb_mbc_encodef, so swap
763 	 * the big-endian fields: tok_type (32-bits),
764 	 * (reserved is 16-bit zero, so no swap),
765 	 * and tok_len (16-bits)
766 	 */
767 	rc = smb_mbc_decodef(
768 	    mbc, "l..w",
769 	    &tok->tok_type,
770 	    /* tok_reserved */
771 	    &tok->tok_len);
772 	if (rc != 0)
773 		return (rc);
774 	tok->tok_type = BSWAP_32(tok->tok_type);
775 	tok->tok_len = BSWAP_16(tok->tok_len);
776 
777 	if (tok->tok_len > TOKEN_MAX_PAYLOAD)
778 		return (-1);
779 	rc = MBC_SHADOW_CHAIN(&tok_mbc, mbc,
780 	    mbc->chain_offset, tok->tok_len);
781 	if (rc != 0)
782 		return (rc);
783 
784 	switch (tok->tok_type) {
785 	case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
786 		/* no payload */
787 		break;
788 	case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
789 		rc = smb_odx_get_token_native1(&tok_mbc,
790 		    &tok->tok_u.u_tok_native1);
791 		break;
792 	default:
793 		/* caller will error out */
794 		break;
795 	}
796 
797 	if (rc == 0) {
798 		/* Advance past what we shadowed. */
799 		mbc->chain_offset = start_pos + TOKEN_TOTAL_SIZE;
800 	}
801 
802 	return (rc);
803 }
804 
805 static int
806 smb_odx_get_token_native1(mbuf_chain_t *mbc, struct tok_native1 *tn1)
807 {
808 	int rc;
809 
810 	rc = smb_mbc_decodef(
811 	    mbc, "qqqql",
812 	    &tn1->tn1_fid.persistent,
813 	    &tn1->tn1_fid.temporal,
814 	    &tn1->tn1_off,
815 	    &tn1->tn1_eof,
816 	    &tn1->tn1_tid);
817 
818 	return (rc);
819 }
820 
821 /*
822  * Put an smb_odx_token_t into the (output) mbuf chain,
823  * padded to TOKEN_TOTAL_SIZE bytes.
824  */
825 static int
826 smb_odx_put_token(mbuf_chain_t *mbc, smb_odx_token_t *tok)
827 {
828 	int rc, padlen;
829 	int start_pos = mbc->chain_offset;
830 	int end_pos = start_pos + TOKEN_TOTAL_SIZE;
831 
832 	if (tok->tok_len > TOKEN_MAX_PAYLOAD)
833 		return (-1);
834 
835 	/*
836 	 * No big-endian support in smb_mbc_encodef, so swap
837 	 * the big-endian fields: tok_type (32-bits),
838 	 * (reserved is 16-bit zero, so no swap),
839 	 * and tok_len (16-bits)
840 	 */
841 	rc = smb_mbc_encodef(
842 	    mbc, "lww",
843 	    BSWAP_32(tok->tok_type),
844 	    0, /* tok_reserved */
845 	    BSWAP_16(tok->tok_len));
846 	if (rc != 0)
847 		return (rc);
848 
849 	switch (tok->tok_type) {
850 	case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
851 		/* no payload */
852 		break;
853 	case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
854 		rc = smb_odx_put_token_native1(mbc,
855 		    &tok->tok_u.u_tok_native1);
856 		break;
857 	default:
858 		ASSERT(0);
859 		return (-1);
860 	}
861 
862 	/* Pad out to TOKEN_TOTAL_SIZE bytes. */
863 	if (mbc->chain_offset < end_pos) {
864 		padlen = end_pos - mbc->chain_offset;
865 		(void) smb_mbc_encodef(mbc, "#.", padlen);
866 	}
867 	ASSERT(mbc->chain_offset == end_pos);
868 
869 	return (rc);
870 }
871 
872 static int
873 smb_odx_put_token_native1(mbuf_chain_t *mbc, struct tok_native1 *tn1)
874 {
875 	int rc;
876 
877 	rc = smb_mbc_encodef(
878 	    mbc, "qqqql",
879 	    tn1->tn1_fid.persistent,
880 	    tn1->tn1_fid.temporal,
881 	    tn1->tn1_off,
882 	    tn1->tn1_eof,
883 	    tn1->tn1_tid);
884 
885 	return (rc);
886 }
887