xref: /illumos-gate/usr/src/uts/common/fs/smbsrv/smb2_fsctl_odx.c (revision 35a075c30369bda7caecc8d23aaabe61768b4440)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
14  */
15 
16 /*
17  * Support functions for smb2_ioctl/fsctl codes:
18  * FSCTL_SRV_OFFLOAD_READ
19  * FSCTL_SRV_OFFLOAD_WRITE
20  * (and related)
21  */
22 
23 #include <smbsrv/smb2_kproto.h>
24 #include <smbsrv/smb_fsops.h>
25 #include <smb/winioctl.h>
26 
27 /*
28  * Summary of how offload data transfer works:
29  *
30  * The client drives a server-side copy.  Outline:
31  * 1: open src_file
32  * 2: create dst_file and set its size
33  * 3: while src_file not all copied {
34  *        offload_read(src_file, &token);
35  *        while token not all copied {
36  *	      offload_write(dst_file, token);
37  *        }
38  *    }
39  *
40  * Each "offload read" request returns a "token" representing some
41  * portion of the source file.  The server decides what kind of
42  * token to use, and how much of the source file it should cover.
43  * The length represented may be less then the client requested.
44  * No data are copied during offload_read (just meta-data).
45  *
46  * Each "offload write" request copies some portion of the data
47  * represented by the "token" into the output file.  The amount
48  * of data copied may be less than the client requested, and the
49  * client keeps sending offload write requests until they have
50  * copied all the data represented by the current token.
51  */
52 
53 /* [MS-FSA] OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND_CURRENT_RANGE */
54 #define	OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND	1
55 
56 /*
57  * [MS-FSCC] 2.3.79 STORAGE_OFFLOAD_TOKEN
58  * Note reserved: 0xFFFF0002 – 0xFFFFFFFF
59  *
60  * ...TOKEN_TYPE_ZERO_DATA:  A well-known Token that indicates ...
61  * (offload write should just zero to the destination)
62  * The payload (tok_other) is ignored with this type.
63  */
64 #define	STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA	0xFFFF0001
65 
66 /* Our vendor-specific token type: struct tok_native1 */
67 #define	STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1	0x10001
68 
69 #define	TOKEN_TOTAL_SIZE	512
70 #define	TOKEN_MAX_PAYLOAD	504	/* 512 - 8 */
71 
72 /* This mask is for sanity checking offsets etc. */
73 #define	OFFMASK		((uint64_t)DEV_BSIZE-1)
74 
75 typedef struct smb_odx_token {
76 	uint32_t	tok_type;	/* big-endian on the wire */
77 	uint16_t	tok_reserved;	/* zero */
78 	uint16_t	tok_len;	/* big-endian on the wire */
79 	union {
80 		uint8_t u_tok_other[TOKEN_MAX_PAYLOAD];
81 		struct tok_native1 {
82 			smb2fid_t	tn1_fid;
83 			uint64_t	tn1_off;
84 			uint64_t	tn1_eof;
85 		} u_tok_native1;
86 	} tok_u;
87 } smb_odx_token_t;
88 
89 typedef struct odx_write_args {
90 	uint32_t in_struct_size;
91 	uint32_t in_flags;
92 	uint64_t in_dstoff;
93 	uint64_t in_xlen;
94 	uint64_t in_xoff;
95 	uint32_t out_struct_size;
96 	uint32_t out_flags;
97 	uint64_t out_xlen;
98 	uint64_t wa_eof;
99 } odx_write_args_t;
100 
101 static int smb_odx_get_token(mbuf_chain_t *, smb_odx_token_t *);
102 static int smb_odx_get_token_native1(mbuf_chain_t *, struct tok_native1 *);
103 static int smb_odx_put_token(mbuf_chain_t *, smb_odx_token_t *);
104 static int smb_odx_put_token_native1(mbuf_chain_t *, struct tok_native1 *);
105 
106 static uint32_t smb2_fsctl_odx_write_zeros(smb_request_t *, odx_write_args_t *);
107 static uint32_t smb2_fsctl_odx_write_native1(smb_request_t *,
108     odx_write_args_t *, smb_odx_token_t *);
109 
110 
111 /* We can disable this feature for testing etc. */
112 int smb2_odx_enable = 1;
113 
114 /*
115  * These two variables determine the intervals of offload_read and
116  * offload_write calls (respectively) during an offload copy.
117  *
118  * For the offload read token we could offer a token representing
119  * the whole file, but we'll have the client come back for a new
120  * "token" after each 256M so we have a chance to look for "holes".
121  * This lets us use the special "zero" token while we're in any
122  * un-allocated parts of the file, so offload_write can use the
123  * (more efficient) smb_fsop_freesp instead of copying.
124  *
125  * We limit the size of offload_write to 16M per request so we
126  * don't end up taking so long with I/O that the client might
127  * time out the request.  Keep: write_max <= read_max
128  */
129 uint32_t smb2_odx_read_max = (1<<28); /* 256M */
130 uint32_t smb2_odx_write_max = (1<<24); /* 16M */
131 
132 /*
133  * This buffer size determines the I/O size for the copy during
134  * offoad write, where it will read/write using this buffer.
135  * Note: We kmem_alloc this, so don't make it HUGE.  It only
136  * needs to be large enough to allow the copy to proceed with
137  * reasonable efficiency.  1M is currently the largest possible
138  * block size with ZFS, so that's what we'll use here.
139  */
140 uint32_t smb2_odx_buf_size = (1<<20); /* 1M */
141 
142 
143 /*
144  * FSCTL_OFFLOAD_READ
145  * [MS-FSCC] 2.3.77
146  *
147  * Similar (in concept) to FSCTL_SRV_REQUEST_RESUME_KEY
148  *
149  * The returned data is an (opaque to the client) 512-byte "token"
150  * that represents the specified range (offset, length) of the
151  * source file.  The "token" we return here comes back to us in an
152  * FSCTL_OFFLOAD_READ.  We must stash whatever we'll need then in
153  * the token we return here.
154  *
155  * We want server-side copy to be able to copy "holes" efficiently,
156  * but would rather avoid the complexity of encoding a list of all
157  * allocated ranges into our returned token, so this compromise:
158  *
159  * When the current range is entirely within a "hole", we'll return
160  * the special "zeros" token, and the offload write using that token
161  * will use the simple and very efficient smb_fsop_freesp.  In this
162  * scenario, we'll have a copy stride of smb2_odx_read_max (256M).
163  *
164  * When there's any data in the range to copy, we'll return our
165  * "native" token, and the subsequent offload_write will walk the
166  * allocated ranges copying and/or zeroing as needed.  In this
167  * scenario, we'll have a copy stride of smb2_odx_write_max (16M).
168  *
169  * One additional optimization allowed by the protocol is that when
170  * we discover that there's no more data after the current range,
171  * we can set the flag ..._ALL_ZERO_BEYOND which tells that client
172  * they can stop copying here if they like.
173  */
174 uint32_t
175 smb2_fsctl_odx_read(smb_request_t *sr, smb_fsctl_t *fsctl)
176 {
177 	smb_attr_t src_attr;
178 	smb_odx_token_t *tok = NULL;
179 	struct tok_native1 *tn1;
180 	smb_ofile_t *ofile = sr->fid_ofile;
181 	uint64_t src_size, src_rnd_size;
182 	off64_t data, hole;
183 	uint32_t in_struct_size;
184 	uint32_t in_flags;
185 	uint32_t in_ttl;
186 	uint64_t in_file_off;
187 	uint64_t in_copy_len;
188 	uint64_t out_xlen;
189 	uint32_t out_struct_size = TOKEN_TOTAL_SIZE + 16;
190 	uint32_t out_flags = 0;
191 	uint32_t status;
192 	uint32_t tok_type;
193 	int rc;
194 
195 	if (smb2_odx_enable == 0)
196 		return (NT_STATUS_NOT_SUPPORTED);
197 
198 	/*
199 	 * Make sure the (src) ofile granted access allows read.
200 	 * [MS-FSA] didn't mention this, so it's not clear where
201 	 * this should happen relative to other checks.  Usually
202 	 * access checks happen early.
203 	 */
204 	status = smb_ofile_access(ofile, ofile->f_cr, FILE_READ_DATA);
205 	if (status != NT_STATUS_SUCCESS)
206 		return (status);
207 
208 	/*
209 	 * Decode FSCTL_OFFLOAD_READ_INPUT struct,
210 	 * and do in/out size checks.
211 	 */
212 	rc = smb_mbc_decodef(
213 	    fsctl->in_mbc, "lll4.qq",
214 	    &in_struct_size,	/* l */
215 	    &in_flags,		/* l */
216 	    &in_ttl,		/* l */
217 	    /* reserved		4. */
218 	    &in_file_off,	/* q */
219 	    &in_copy_len);	/* q */
220 	if (rc != 0)
221 		return (NT_STATUS_BUFFER_TOO_SMALL);
222 	if (fsctl->MaxOutputResp < out_struct_size)
223 		return (NT_STATUS_BUFFER_TOO_SMALL);
224 
225 	/*
226 	 * More arg checking per MS-FSA
227 	 */
228 	if ((in_file_off & OFFMASK) != 0 ||
229 	    (in_copy_len & OFFMASK) != 0)
230 		return (NT_STATUS_INVALID_PARAMETER);
231 	if (in_struct_size != 32)
232 		return (NT_STATUS_INVALID_PARAMETER);
233 	if (in_file_off > INT64_MAX ||
234 	    (in_file_off + in_copy_len) < in_file_off)
235 		return (NT_STATUS_INVALID_PARAMETER);
236 
237 	/*
238 	 * [MS-FSA] (summarizing)
239 	 * If not data stream, or if sparse, encrypted, compressed...
240 	 * return STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED.
241 	 *
242 	 * We'll ignore most of those except to require:
243 	 * Plain file, not a stream.
244 	 */
245 	if (!smb_node_is_file(ofile->f_node))
246 		return (NT_STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED);
247 	if (SMB_IS_STREAM(ofile->f_node))
248 		return (NT_STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED);
249 
250 	/*
251 	 * [MS-FSA] If Open.Stream.IsDeleted ...
252 	 * We don't really have this.
253 	 */
254 
255 	/*
256 	 * If CopyLength == 0, "return immediately success".
257 	 */
258 	if (in_copy_len == 0) {
259 		out_xlen = 0;
260 		tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
261 		goto done;
262 	}
263 
264 	/*
265 	 * Check for lock conflicting with the read.
266 	 */
267 	status = smb_lock_range_access(sr, ofile->f_node,
268 	    in_file_off, in_copy_len, B_FALSE);
269 	if (status != 0)
270 		return (status); /* == FILE_LOCK_CONFLICT */
271 
272 	/*
273 	 * Get the file size (rounded to a full block)
274 	 * and check the requested offset.
275 	 */
276 	bzero(&src_attr, sizeof (src_attr));
277 	src_attr.sa_mask = SMB_AT_SIZE;
278 	status = smb2_ofile_getattr(sr, ofile, &src_attr);
279 	if (status != NT_STATUS_SUCCESS)
280 		return (status);
281 	src_size = src_attr.sa_vattr.va_size;
282 	if (in_file_off >= src_size)
283 		return (NT_STATUS_END_OF_FILE);
284 
285 	/*
286 	 * Limit the transfer length based on (rounded) EOF.
287 	 * Clients expect ranges of whole disk blocks.
288 	 * If we get a read in this rounded-up range,
289 	 * we'll supply zeros.
290 	 */
291 	src_rnd_size = (src_size + OFFMASK) & ~OFFMASK;
292 	out_xlen = in_copy_len;
293 	if ((in_file_off + out_xlen) > src_rnd_size)
294 		out_xlen = src_rnd_size - in_file_off;
295 
296 	/*
297 	 * Also, have the client come back for a new token after every
298 	 * smb2_odx_read_max bytes, so we'll have opportunities to
299 	 * recognize "holes" in the source file.
300 	 */
301 	if (out_xlen > smb2_odx_read_max)
302 		out_xlen = smb2_odx_read_max;
303 
304 	/*
305 	 * Ask the filesystem if there are any allocated regions in
306 	 * the requested range, and return either the "zeros" token
307 	 * or our "native" token as appropriate (details above).
308 	 */
309 	data = in_file_off;
310 	tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1;
311 	if (sr->sr_state != SMB_REQ_STATE_ACTIVE)
312 		return (NT_STATUS_SUCCESS);
313 	rc = smb_fsop_next_alloc_range(ofile->f_cr, ofile->f_node,
314 	    &data, &hole);
315 	switch (rc) {
316 	case 0:
317 		/* Found some data.  Is it beyond this range? */
318 		if (data >= (in_file_off + out_xlen))
319 			tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
320 		break;
321 	case ENXIO:
322 		/* No data here or following. */
323 		tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
324 		out_flags |= OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND;
325 		break;
326 	case ENOSYS:	/* FS does not support VOP_IOCTL... */
327 	case ENOTTY:	/* ... or _FIO_SEEK_DATA, _HOLE */
328 		break;
329 	default:
330 		cmn_err(CE_NOTE, "smb_fsop_next_alloc_range: rc=%d", rc);
331 		break;
332 	}
333 
334 done:
335 	/* Already checked MaxOutputResp */
336 	(void) smb_mbc_encodef(
337 	    fsctl->out_mbc, "llq",
338 	    out_struct_size,	/* l */
339 	    out_flags,		/* l */
340 	    out_xlen);		/* q */
341 
342 	/*
343 	 * Build the ODX token to return
344 	 */
345 	tok = smb_srm_zalloc(sr, sizeof (*tok));
346 	tok->tok_type = tok_type;
347 	tok->tok_reserved = 0;
348 	if (tok_type == STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1) {
349 		tok->tok_len = sizeof (*tn1);
350 		tn1 = &tok->tok_u.u_tok_native1;
351 		tn1->tn1_fid.persistent = ofile->f_persistid;
352 		tn1->tn1_fid.temporal = ofile->f_fid;
353 		tn1->tn1_off = in_file_off;
354 		tn1->tn1_eof = src_size;
355 	}
356 
357 	rc = smb_odx_put_token(fsctl->out_mbc, tok);
358 	if (rc != 0)
359 		return (NT_STATUS_BUFFER_TOO_SMALL);
360 
361 	return (NT_STATUS_SUCCESS);
362 }
363 
364 /*
365  * FSCTL_SRV_OFFLOAD_WRITE
366  * [MS-FSCC] 2.3.80
367  *
368  * Similar (in concept) to FSCTL_COPYCHUNK_WRITE
369  *
370  * Copies from a source file identified by a "token"
371  * (previously returned by FSCTL_OFFLOAD_READ)
372  * to the file on which the ioctl is issued.
373  */
374 uint32_t
375 smb2_fsctl_odx_write(smb_request_t *sr, smb_fsctl_t *fsctl)
376 {
377 	smb_attr_t dst_attr;
378 	odx_write_args_t args;
379 	smb_odx_token_t *tok = NULL;
380 	smb_ofile_t *ofile = sr->fid_ofile;
381 	uint64_t dst_rnd_size;
382 	uint32_t status = NT_STATUS_INVALID_PARAMETER;
383 	int rc;
384 
385 	bzero(&args, sizeof (args));
386 	args.out_struct_size = 16;
387 
388 	if (smb2_odx_enable == 0)
389 		return (NT_STATUS_NOT_SUPPORTED);
390 
391 	/*
392 	 * Make sure the (dst) ofile granted_access allows write.
393 	 * [MS-FSA] didn't mention this, so it's not clear where
394 	 * this should happen relative to other checks.  Usually
395 	 * access checks happen early.
396 	 */
397 	status = smb_ofile_access(ofile, ofile->f_cr, FILE_WRITE_DATA);
398 	if (status != NT_STATUS_SUCCESS)
399 		return (status);
400 
401 	/*
402 	 * Decode FSCTL_OFFLOAD_WRITE_INPUT struct,
403 	 * and do in/out size checks.
404 	 */
405 	rc = smb_mbc_decodef(
406 	    fsctl->in_mbc, "llqqq",
407 	    &args.in_struct_size,	/* l */
408 	    &args.in_flags,		/* l */
409 	    &args.in_dstoff,		/* q */
410 	    &args.in_xlen,		/* q */
411 	    &args.in_xoff);		/* q */
412 	if (rc != 0)
413 		return (NT_STATUS_BUFFER_TOO_SMALL);
414 	tok = smb_srm_zalloc(sr, sizeof (*tok));
415 	rc = smb_odx_get_token(fsctl->in_mbc, tok);
416 	if (rc != 0)
417 		return (NT_STATUS_BUFFER_TOO_SMALL);
418 	if (fsctl->MaxOutputResp < args.out_struct_size)
419 		return (NT_STATUS_BUFFER_TOO_SMALL);
420 
421 	/*
422 	 * More arg checking per MS-FSA
423 	 */
424 	if ((args.in_dstoff & OFFMASK) != 0 ||
425 	    (args.in_xoff & OFFMASK) != 0 ||
426 	    (args.in_xlen & OFFMASK) != 0)
427 		return (NT_STATUS_INVALID_PARAMETER);
428 	if (args.in_struct_size != (TOKEN_TOTAL_SIZE + 32))
429 		return (NT_STATUS_INVALID_PARAMETER);
430 	if (args.in_dstoff > INT64_MAX ||
431 	    (args.in_dstoff + args.in_xlen) < args.in_dstoff)
432 		return (NT_STATUS_INVALID_PARAMETER);
433 
434 	/*
435 	 * If CopyLength == 0, "return immediately success".
436 	 */
437 	if (args.in_xlen == 0) {
438 		status = 0;
439 		goto done;
440 	}
441 
442 	/*
443 	 * [MS-FSA] (summarizing)
444 	 * If not data stream, or if sparse, encrypted, compressed...
445 	 * return STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED.
446 	 *
447 	 * We'll ignore most of those except to require:
448 	 * Plain file, not a stream.
449 	 */
450 	if (!smb_node_is_file(ofile->f_node))
451 		return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED);
452 	if (SMB_IS_STREAM(ofile->f_node))
453 		return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED);
454 
455 	/*
456 	 * [MS-FSA] If Open.Stream.IsDeleted ...
457 	 * We don't really have such a thing.
458 	 * Also skip Volume.MaxFileSize check.
459 	 */
460 
461 	/*
462 	 * Check for lock conflicting with the write.
463 	 */
464 	status = smb_lock_range_access(sr, ofile->f_node,
465 	    args.in_dstoff, args.in_xlen, B_TRUE);
466 	if (status != 0)
467 		return (status); /* == FILE_LOCK_CONFLICT */
468 
469 	/*
470 	 * Need the file size
471 	 */
472 	bzero(&dst_attr, sizeof (dst_attr));
473 	dst_attr.sa_mask = SMB_AT_SIZE;
474 	status = smb2_ofile_getattr(sr, ofile, &dst_attr);
475 	if (status != NT_STATUS_SUCCESS)
476 		return (status);
477 	args.wa_eof = dst_attr.sa_vattr.va_size;
478 	dst_rnd_size = (args.wa_eof + OFFMASK) & ~OFFMASK;
479 
480 	/*
481 	 * Destination offset vs. EOF
482 	 */
483 	if (args.in_dstoff >= args.wa_eof)
484 		return (NT_STATUS_END_OF_FILE);
485 
486 	/*
487 	 * Destination offset+len vs. EOF
488 	 *
489 	 * The spec. is silent about copying when the file length is
490 	 * not block aligned, but clients appear to ask us to copy a
491 	 * range that's rounded up to a block size.  We'll limit the
492 	 * transfer size to the rounded up file size, but the actual
493 	 * copy will stop at EOF (args.wa_eof).
494 	 */
495 	if ((args.in_dstoff + args.in_xlen) > dst_rnd_size)
496 		args.in_xlen = dst_rnd_size - args.in_dstoff;
497 
498 	/*
499 	 * Finally, run the I/O
500 	 */
501 	switch (tok->tok_type) {
502 	case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
503 		status = smb2_fsctl_odx_write_zeros(sr, &args);
504 		break;
505 	case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
506 		status = smb2_fsctl_odx_write_native1(sr, &args, tok);
507 		break;
508 	default:
509 		status = NT_STATUS_INVALID_TOKEN;
510 		break;
511 	}
512 
513 done:
514 	/*
515 	 * Checked MaxOutputResp above, so we can ignore errors
516 	 * from mbc_encodef here.
517 	 */
518 	if (status == NT_STATUS_SUCCESS) {
519 		(void) smb_mbc_encodef(
520 		    fsctl->out_mbc, "llq",
521 		    args.out_struct_size,
522 		    args.out_flags,
523 		    args.out_xlen);
524 	}
525 
526 	return (status);
527 }
528 
529 /*
530  * Handle FSCTL_OFFLOAD_WRITE with token type
531  * STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA
532  *
533  * In this handler, the "token" represents a source of zeros.
534  */
535 static uint32_t
536 smb2_fsctl_odx_write_zeros(smb_request_t *sr, odx_write_args_t *args)
537 {
538 	smb_ofile_t *dst_ofile = sr->fid_ofile;
539 	uint64_t xlen = args->in_xlen;
540 	uint32_t status = 0;
541 	int rc;
542 
543 	ASSERT(args->in_xlen > 0);
544 
545 	/*
546 	 * Limit the I/O size.  In here we're just doing freesp,
547 	 * which is assumed to require only meta-data I/O, so
548 	 * we'll allow up to smb2_odx_read_max (256M) per call.
549 	 * This is essentially just a double-check of the range
550 	 * we gave the client at the offload_read call, making
551 	 * sure they can't use a zero token for longer ranges
552 	 * than offload_read would allow.
553 	 */
554 	if (xlen > smb2_odx_read_max)
555 		xlen = smb2_odx_read_max;
556 
557 	/*
558 	 * Also limit to the actual file size, which may be
559 	 * smaller than the (block-aligned) transfer size.
560 	 * Report the rounded up size to the caller at EOF.
561 	 */
562 	args->out_xlen = xlen;
563 	if ((args->in_dstoff + xlen) > args->wa_eof)
564 		xlen = args->wa_eof - args->in_dstoff;
565 
566 	/*
567 	 * Arrange for zeros to appear in the range:
568 	 * in_dstoff, (in_dstoff + in_xlen)
569 	 *
570 	 * Just "free" the range and let it allocate as needed
571 	 * when someone later writes in this range.
572 	 */
573 	rc = smb_fsop_freesp(sr, dst_ofile->f_cr, dst_ofile,
574 	    args->in_dstoff, xlen);
575 	if (rc != 0) {
576 		status = smb_errno2status(rc);
577 		if (status == NT_STATUS_INVALID_PARAMETER ||
578 		    status == NT_STATUS_NOT_SUPPORTED)
579 			status = NT_STATUS_INVALID_DEVICE_REQUEST;
580 		args->out_xlen = 0;
581 	} else {
582 		status = 0;
583 	}
584 
585 	return (status);
586 }
587 
588 /*
589  * Handle FSCTL_OFFLOAD_WRITE with token type
590  * STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1
591  */
592 static uint32_t
593 smb2_fsctl_odx_write_native1(smb_request_t *sr,
594     odx_write_args_t *args, smb_odx_token_t *tok)
595 {
596 	struct tok_native1 *tn1;
597 	smb_ofile_t *dst_ofile = sr->fid_ofile;
598 	smb_ofile_t *src_ofile = NULL;
599 	void *buffer = NULL;
600 	size_t bufsize = smb2_odx_buf_size;
601 	uint64_t src_offset;
602 	uint32_t resid;
603 	uint32_t xlen;
604 	uint32_t status;
605 
606 	/*
607 	 * Lookup the source ofile using the resume key,
608 	 * which smb2_fsctl_offload_read encoded as an
609 	 * smb2fid_t.  Similar to smb2sr_lookup_fid(),
610 	 * but different error code.
611 	 */
612 	tn1 = &tok->tok_u.u_tok_native1;
613 	src_ofile = smb_ofile_lookup_by_fid(sr,
614 	    (uint16_t)tn1->tn1_fid.temporal);
615 	if (src_ofile == NULL ||
616 	    src_ofile->f_persistid != tn1->tn1_fid.persistent) {
617 		status = NT_STATUS_INVALID_TOKEN;
618 		goto out;
619 	}
620 
621 	/*
622 	 * Make sure src_ofile is open on a regular file, and
623 	 * granted access includes READ_DATA
624 	 */
625 	if (!smb_node_is_file(src_ofile->f_node)) {
626 		status = NT_STATUS_ACCESS_DENIED;
627 		goto out;
628 	}
629 	status = smb_ofile_access(src_ofile, src_ofile->f_cr, FILE_READ_DATA);
630 	if (status != NT_STATUS_SUCCESS)
631 		goto out;
632 
633 	/*
634 	 * Limit the I/O size.  In here we're actually copying,
635 	 * so limit to smb2_odx_write_max (16M) per call.
636 	 * Note that xlen is a 32-bit value here.
637 	 */
638 	if (args->in_xlen > smb2_odx_write_max)
639 		xlen = smb2_odx_write_max;
640 	else
641 		xlen = (uint32_t)args->in_xlen;
642 
643 	/*
644 	 * Also limit to the actual file size, which may be
645 	 * smaller than the (block-aligned) transfer size.
646 	 * Report the rounded up size to the caller at EOF.
647 	 */
648 	args->out_xlen = xlen;
649 	if ((args->in_dstoff + xlen) > args->wa_eof)
650 		xlen = (uint32_t)(args->wa_eof - args->in_dstoff);
651 
652 	/*
653 	 * Note: in_xoff is relative to the beginning of the "token"
654 	 * (a range of the source file tn1_off, tn1_eof).  Make sure
655 	 * in_xoff is within the range represented by this token.
656 	 */
657 	src_offset = tn1->tn1_off + args->in_xoff;
658 	if (src_offset >= tn1->tn1_eof ||
659 	    src_offset < tn1->tn1_off) {
660 		status = NT_STATUS_INVALID_PARAMETER;
661 		goto out;
662 	}
663 
664 	/*
665 	 * Get a buffer used for copying, always
666 	 * smb2_odx_buf_size (1M)
667 	 *
668 	 * Rather than sleep for this relatively large allocation,
669 	 * allow the allocation to fail and return an error.
670 	 * The client should then fall back to normal copy.
671 	 */
672 	buffer = kmem_alloc(bufsize, KM_NOSLEEP_LAZY);
673 	if (buffer == NULL) {
674 		status = NT_STATUS_INSUFF_SERVER_RESOURCES;
675 		goto out;
676 	}
677 
678 	/*
679 	 * Copy src to dst for xlen
680 	 */
681 	resid = xlen;
682 	status = smb2_sparse_copy(sr, src_ofile, dst_ofile,
683 	    src_offset, args->in_dstoff, &resid, buffer, bufsize);
684 
685 	/*
686 	 * If the result was a partial copy, round down the
687 	 * reported transfer size to a block boundary.
688 	 */
689 	if (resid != 0) {
690 		xlen -= resid;
691 		xlen &= ~OFFMASK;
692 		args->out_xlen = xlen;
693 	}
694 
695 	/*
696 	 * If we did any I/O, ignore the error that stopped us.
697 	 * We'll report this error during the next call.
698 	 */
699 	if (args->out_xlen > 0)
700 		status = 0;
701 
702 out:
703 	if (src_ofile != NULL)
704 		smb_ofile_release(src_ofile);
705 
706 	if (buffer != NULL)
707 		kmem_free(buffer, bufsize);
708 
709 	return (status);
710 }
711 
712 /*
713  * Get an smb_odx_token_t from the (input) mbuf chain.
714  * Consumes exactly TOKEN_TOTAL_SIZE bytes.
715  */
716 static int
717 smb_odx_get_token(mbuf_chain_t *mbc, smb_odx_token_t *tok)
718 {
719 	mbuf_chain_t tok_mbc;
720 	int start_pos = mbc->chain_offset;
721 	int rc;
722 
723 	if (MBC_ROOM_FOR(mbc, TOKEN_TOTAL_SIZE) == 0)
724 		return (-1);
725 
726 	/*
727 	 * No big-endian support in smb_mbc_encodef, so swap
728 	 * the big-endian fields: tok_type (32-bits),
729 	 * (reserved is 16-bit zero, so no swap),
730 	 * and tok_len (16-bits)
731 	 */
732 	rc = smb_mbc_decodef(
733 	    mbc, "l..w",
734 	    &tok->tok_type,
735 	    /* tok_reserved */
736 	    &tok->tok_len);
737 	if (rc != 0)
738 		return (rc);
739 	tok->tok_type = BSWAP_32(tok->tok_type);
740 	tok->tok_len = BSWAP_16(tok->tok_len);
741 
742 	if (tok->tok_len > TOKEN_MAX_PAYLOAD)
743 		return (-1);
744 	rc = MBC_SHADOW_CHAIN(&tok_mbc, mbc,
745 	    mbc->chain_offset, tok->tok_len);
746 	if (rc != 0)
747 		return (rc);
748 
749 	switch (tok->tok_type) {
750 	case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
751 		/* no payload */
752 		break;
753 	case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
754 		rc = smb_odx_get_token_native1(&tok_mbc,
755 		    &tok->tok_u.u_tok_native1);
756 		break;
757 	default:
758 		/* caller will error out */
759 		break;
760 	}
761 
762 	if (rc == 0) {
763 		/* Advance past what we shadowed. */
764 		mbc->chain_offset = start_pos + TOKEN_TOTAL_SIZE;
765 	}
766 
767 	return (rc);
768 }
769 
770 static int
771 smb_odx_get_token_native1(mbuf_chain_t *mbc, struct tok_native1 *tn1)
772 {
773 	int rc;
774 
775 	rc = smb_mbc_decodef(
776 	    mbc, "qqqq",
777 	    &tn1->tn1_fid.persistent,
778 	    &tn1->tn1_fid.temporal,
779 	    &tn1->tn1_off,
780 	    &tn1->tn1_eof);
781 
782 	return (rc);
783 }
784 
785 /*
786  * Put an smb_odx_token_t into the (output) mbuf chain,
787  * padded to TOKEN_TOTAL_SIZE bytes.
788  */
789 static int
790 smb_odx_put_token(mbuf_chain_t *mbc, smb_odx_token_t *tok)
791 {
792 	int rc, padlen;
793 	int start_pos = mbc->chain_offset;
794 	int end_pos = start_pos + TOKEN_TOTAL_SIZE;
795 
796 	if (tok->tok_len > TOKEN_MAX_PAYLOAD)
797 		return (-1);
798 
799 	/*
800 	 * No big-endian support in smb_mbc_encodef, so swap
801 	 * the big-endian fields: tok_type (32-bits),
802 	 * (reserved is 16-bit zero, so no swap),
803 	 * and tok_len (16-bits)
804 	 */
805 	rc = smb_mbc_encodef(
806 	    mbc, "lww",
807 	    BSWAP_32(tok->tok_type),
808 	    0, /* tok_reserved */
809 	    BSWAP_16(tok->tok_len));
810 	if (rc != 0)
811 		return (rc);
812 
813 	switch (tok->tok_type) {
814 	case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
815 		/* no payload */
816 		break;
817 	case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
818 		rc = smb_odx_put_token_native1(mbc,
819 		    &tok->tok_u.u_tok_native1);
820 		break;
821 	default:
822 		ASSERT(0);
823 		return (-1);
824 	}
825 
826 	/* Pad out to TOKEN_TOTAL_SIZE bytes. */
827 	if (mbc->chain_offset < end_pos) {
828 		padlen = end_pos - mbc->chain_offset;
829 		(void) smb_mbc_encodef(mbc, "#.", padlen);
830 	}
831 	ASSERT(mbc->chain_offset == end_pos);
832 
833 	return (rc);
834 }
835 
836 static int
837 smb_odx_put_token_native1(mbuf_chain_t *mbc, struct tok_native1 *tn1)
838 {
839 	int rc;
840 
841 	rc = smb_mbc_encodef(
842 	    mbc, "qqqq",
843 	    tn1->tn1_fid.persistent,
844 	    tn1->tn1_fid.temporal,
845 	    tn1->tn1_off,
846 	    tn1->tn1_eof);
847 
848 	return (rc);
849 }
850