1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018-2021 Tintri by DDN, Inc. All rights reserved.
14 * Copyright 2022 RackTop Systems, Inc.
15 */
16
17 /*
18 * Support functions for smb2_ioctl/fsctl codes:
19 * FSCTL_OFFLOAD_READ
20 * FSCTL_OFFLOAD_WRITE
21 * (and related)
22 */
23
24 #include <smbsrv/smb2_kproto.h>
25 #include <smbsrv/smb_fsops.h>
26 #include <smb/winioctl.h>
27
28 /*
29 * Summary of how offload data transfer works:
30 *
31 * The client drives a server-side copy. Outline:
32 * 1: open src_file
33 * 2: create dst_file and set its size
34 * 3: while src_file not all copied {
35 * offload_read(src_file, &token);
36 * while token not all copied {
37 * offload_write(dst_file, token);
38 * }
39 * }
40 *
41 * Each "offload read" request returns a "token" representing some
42 * portion of the source file. The server decides what kind of
43 * token to use, and how much of the source file it should cover.
44 * The length represented may be less then the client requested.
45 * No data are copied during offload_read (just meta-data).
46 *
47 * Each "offload write" request copies some portion of the data
48 * represented by the "token" into the output file. The amount
49 * of data copied may be less than the client requested, and the
50 * client keeps sending offload write requests until they have
51 * copied all the data represented by the current token.
52 */
53
54 /* [MS-FSA] OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND_CURRENT_RANGE */
55 #define OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND 1
56
57 /*
58 * [MS-FSCC] 2.3.79 STORAGE_OFFLOAD_TOKEN
59 * Note reserved: 0xFFFF0002 – 0xFFFFFFFF
60 *
61 * ...TOKEN_TYPE_ZERO_DATA: A well-known Token that indicates ...
62 * (offload write should just zero to the destination)
63 * The payload (tok_other) is ignored with this type.
64 */
65 #define STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA 0xFFFF0001
66
67 /* Our vendor-specific token type: struct tok_native1 */
68 #define STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1 0x10001
69
70 #define TOKEN_TOTAL_SIZE 512
71 #define TOKEN_MAX_PAYLOAD 504 /* 512 - 8 */
72
73 /* This mask is for sanity checking offsets etc. */
74 #define OFFMASK ((uint64_t)DEV_BSIZE-1)
75
76 typedef struct smb_odx_token {
77 uint32_t tok_type; /* big-endian on the wire */
78 uint16_t tok_reserved; /* zero */
79 uint16_t tok_len; /* big-endian on the wire */
80 union {
81 uint8_t u_tok_other[TOKEN_MAX_PAYLOAD];
82 struct tok_native1 {
83 smb2fid_t tn1_fid;
84 uint64_t tn1_off;
85 uint64_t tn1_eof;
86 uint32_t tn1_tid;
87 } u_tok_native1;
88 } tok_u;
89 } smb_odx_token_t;
90
91 typedef struct odx_write_args {
92 uint32_t in_struct_size;
93 uint32_t in_flags;
94 uint64_t in_dstoff;
95 uint64_t in_xlen;
96 uint64_t in_xoff;
97 uint32_t out_struct_size;
98 uint32_t out_flags;
99 uint64_t out_xlen;
100 uint64_t wa_eof;
101 } odx_write_args_t;
102
103 static int smb_odx_get_token(mbuf_chain_t *, smb_odx_token_t *);
104 static int smb_odx_get_token_native1(mbuf_chain_t *, struct tok_native1 *);
105 static int smb_odx_put_token(mbuf_chain_t *, smb_odx_token_t *);
106 static int smb_odx_put_token_native1(mbuf_chain_t *, struct tok_native1 *);
107
108 static uint32_t smb2_fsctl_odx_write_zeros(smb_request_t *, odx_write_args_t *);
109 static uint32_t smb2_fsctl_odx_write_native1(smb_request_t *,
110 odx_write_args_t *, smb_odx_token_t *);
111
112
113 /* We can disable this feature for testing etc. */
114 int smb2_odx_enable = 1;
115
116 /*
117 * These two variables determine the intervals of offload_read and
118 * offload_write calls (respectively) during an offload copy.
119 *
120 * For the offload read token we could offer a token representing
121 * the whole file, but we'll have the client come back for a new
122 * "token" after each 256M so we have a chance to look for "holes".
123 * This lets us use the special "zero" token while we're in any
124 * un-allocated parts of the file, so offload_write can use the
125 * (more efficient) smb_fsop_freesp instead of copying.
126 *
127 * We limit the size of offload_write to 16M per request so we
128 * don't end up taking so long with I/O that the client might
129 * time out the request. Keep: write_max <= read_max
130 */
131 uint32_t smb2_odx_read_max = (1<<28); /* 256M */
132 uint32_t smb2_odx_write_max = (1<<24); /* 16M */
133
134 /*
135 * This buffer size determines the I/O size for the copy during
136 * offoad write, where it will read/write using this buffer.
137 * Note: We kmem_alloc this, so don't make it HUGE. It only
138 * needs to be large enough to allow the copy to proceed with
139 * reasonable efficiency. 1M is currently the largest possible
140 * block size with ZFS, so that's what we'll use here.
141 *
142 * Actually, limit this to kmem_max_cached, to avoid contention
143 * allocating from kmem_oversize_arena.
144 */
145 uint32_t smb2_odx_buf_size = (1<<17); /* 128k */
146
147
148 /*
149 * FSCTL_OFFLOAD_READ
150 * [MS-FSCC] 2.3.77
151 *
152 * Similar (in concept) to FSCTL_SRV_REQUEST_RESUME_KEY
153 *
154 * The returned data is an (opaque to the client) 512-byte "token"
155 * that represents the specified range (offset, length) of the
156 * source file. The "token" we return here comes back to us in an
157 * FSCTL_OFFLOAD_READ. We must stash whatever we'll need then in
158 * the token we return here.
159 *
160 * We want server-side copy to be able to copy "holes" efficiently,
161 * but would rather avoid the complexity of encoding a list of all
162 * allocated ranges into our returned token, so this compromise:
163 *
164 * When the current range is entirely within a "hole", we'll return
165 * the special "zeros" token, and the offload write using that token
166 * will use the simple and very efficient smb_fsop_freesp. In this
167 * scenario, we'll have a copy stride of smb2_odx_read_max (256M).
168 *
169 * When there's any data in the range to copy, we'll return our
170 * "native" token, and the subsequent offload_write will walk the
171 * allocated ranges copying and/or zeroing as needed. In this
172 * scenario, we'll have a copy stride of smb2_odx_write_max (16M).
173 *
174 * One additional optimization allowed by the protocol is that when
175 * we discover that there's no more data after the current range,
176 * we can set the flag ..._ALL_ZERO_BEYOND which tells that client
177 * they can stop copying here if they like.
178 */
179 uint32_t
smb2_fsctl_odx_read(smb_request_t * sr,smb_fsctl_t * fsctl)180 smb2_fsctl_odx_read(smb_request_t *sr, smb_fsctl_t *fsctl)
181 {
182 smb_attr_t src_attr;
183 smb_odx_token_t *tok = NULL;
184 struct tok_native1 *tn1;
185 smb_ofile_t *ofile = sr->fid_ofile;
186 uint64_t src_size, src_rnd_size;
187 off64_t data, hole;
188 uint32_t in_struct_size;
189 uint32_t in_flags;
190 uint32_t in_ttl;
191 uint64_t in_file_off;
192 uint64_t in_copy_len;
193 uint64_t out_xlen;
194 uint32_t out_struct_size = TOKEN_TOTAL_SIZE + 16;
195 uint32_t out_flags = 0;
196 uint32_t status;
197 uint32_t tok_type;
198 int rc;
199
200 if (smb2_odx_enable == 0)
201 return (NT_STATUS_INVALID_DEVICE_REQUEST);
202
203 /*
204 * Make sure the (src) ofile granted access allows read.
205 * [MS-FSA] didn't mention this, so it's not clear where
206 * this should happen relative to other checks. Usually
207 * access checks happen early.
208 */
209 status = smb_ofile_access(ofile, ofile->f_cr, FILE_READ_DATA);
210 if (status != NT_STATUS_SUCCESS)
211 return (status);
212
213 /*
214 * Decode FSCTL_OFFLOAD_READ_INPUT struct,
215 * and do in/out size checks.
216 */
217 rc = smb_mbc_decodef(
218 fsctl->in_mbc, "lll4.qq",
219 &in_struct_size, /* l */
220 &in_flags, /* l */
221 &in_ttl, /* l */
222 /* reserved 4. */
223 &in_file_off, /* q */
224 &in_copy_len); /* q */
225 if (rc != 0)
226 return (NT_STATUS_BUFFER_TOO_SMALL);
227 if (fsctl->MaxOutputResp < out_struct_size)
228 return (NT_STATUS_BUFFER_TOO_SMALL);
229
230 /*
231 * More arg checking per MS-FSA
232 */
233 if ((in_file_off & OFFMASK) != 0 ||
234 (in_copy_len & OFFMASK) != 0)
235 return (NT_STATUS_INVALID_PARAMETER);
236 if (in_struct_size != 32)
237 return (NT_STATUS_INVALID_PARAMETER);
238 if (in_file_off > INT64_MAX ||
239 (in_file_off + in_copy_len) < in_file_off)
240 return (NT_STATUS_INVALID_PARAMETER);
241
242 /*
243 * [MS-FSA] (summarizing)
244 * If not data stream, or if sparse, encrypted, compressed...
245 * return STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED.
246 *
247 * We'll ignore most of those except to require:
248 * Plain file, not a stream.
249 */
250 if (!smb_node_is_file(ofile->f_node))
251 return (NT_STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED);
252 if (SMB_IS_STREAM(ofile->f_node))
253 return (NT_STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED);
254
255 /*
256 * [MS-FSA] If Open.Stream.IsDeleted ...
257 */
258 if (ofile->f_node->flags & NODE_FLAGS_DELETE_COMMITTED)
259 return (NT_STATUS_FILE_DELETED);
260
261 /*
262 * If CopyLength == 0, "return immediately success".
263 */
264 if (in_copy_len == 0) {
265 out_xlen = 0;
266 tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
267 goto done;
268 }
269
270 /*
271 * Check for lock conflicting with the read.
272 */
273 status = smb_lock_range_access(sr, ofile->f_node,
274 in_file_off, in_copy_len, B_FALSE);
275 if (status != 0)
276 return (status); /* == FILE_LOCK_CONFLICT */
277
278 /*
279 * Get the file size (rounded to a full block)
280 * and check the requested offset.
281 */
282 bzero(&src_attr, sizeof (src_attr));
283 src_attr.sa_mask = SMB_AT_SIZE;
284 status = smb2_ofile_getattr(sr, ofile, &src_attr);
285 if (status != NT_STATUS_SUCCESS)
286 return (status);
287 src_size = src_attr.sa_vattr.va_size;
288 if (in_file_off >= src_size)
289 return (NT_STATUS_END_OF_FILE);
290
291 /*
292 * Limit the transfer length based on (rounded) EOF.
293 * Clients expect ranges of whole disk blocks.
294 * If we get a read in this rounded-up range,
295 * we'll supply zeros.
296 */
297 src_rnd_size = (src_size + OFFMASK) & ~OFFMASK;
298 out_xlen = in_copy_len;
299 if ((in_file_off + out_xlen) > src_rnd_size)
300 out_xlen = src_rnd_size - in_file_off;
301
302 /*
303 * Also, have the client come back for a new token after every
304 * smb2_odx_read_max bytes, so we'll have opportunities to
305 * recognize "holes" in the source file.
306 */
307 if (out_xlen > smb2_odx_read_max)
308 out_xlen = smb2_odx_read_max;
309
310 /*
311 * Ask the filesystem if there are any allocated regions in
312 * the requested range, and return either the "zeros" token
313 * or our "native" token as appropriate (details above).
314 */
315 data = in_file_off;
316 tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1;
317 rc = smb_fsop_next_alloc_range(ofile->f_cr, ofile->f_node,
318 &data, &hole);
319 switch (rc) {
320 case 0:
321 /* Found some data. Is it beyond this range? */
322 if (data >= (in_file_off + out_xlen))
323 tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
324 break;
325 case ENXIO:
326 /*
327 * No data here to EOF. Use TOKEN_TYPE_ZERO_DATA,
328 * but only if we're not crossing src_size, because
329 * type zero cannot preserve unaligned src_size.
330 */
331 if ((in_file_off + out_xlen) <= src_size)
332 tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA;
333 out_flags |= OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND;
334 break;
335 case ENOSYS: /* FS does not support VOP_IOCTL... */
336 case ENOTTY: /* ... or _FIO_SEEK_DATA, _HOLE */
337 break;
338 default:
339 cmn_err(CE_NOTE, "smb_fsop_next_alloc_range: rc=%d", rc);
340 break;
341 }
342
343 done:
344 /* Already checked MaxOutputResp */
345 (void) smb_mbc_encodef(
346 fsctl->out_mbc, "llq",
347 out_struct_size, /* l */
348 out_flags, /* l */
349 out_xlen); /* q */
350
351 /*
352 * Build the ODX token to return
353 */
354 tok = smb_srm_zalloc(sr, sizeof (*tok));
355 tok->tok_type = tok_type;
356 tok->tok_reserved = 0;
357 if (tok_type == STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1) {
358 tok->tok_len = sizeof (*tn1);
359 tn1 = &tok->tok_u.u_tok_native1;
360 tn1->tn1_fid.persistent = ofile->f_persistid;
361 tn1->tn1_fid.temporal = ofile->f_fid;
362 tn1->tn1_off = in_file_off;
363 tn1->tn1_eof = src_size;
364 tn1->tn1_tid = sr->smb_tid;
365 }
366
367 rc = smb_odx_put_token(fsctl->out_mbc, tok);
368 if (rc != 0)
369 return (NT_STATUS_BUFFER_TOO_SMALL);
370
371 return (NT_STATUS_SUCCESS);
372 }
373
374 /*
375 * FSCTL_OFFLOAD_WRITE
376 * [MS-FSCC] 2.3.80
377 *
378 * Similar (in concept) to FSCTL_COPYCHUNK_WRITE
379 *
380 * Copies from a source file identified by a "token"
381 * (previously returned by FSCTL_OFFLOAD_READ)
382 * to the file on which the ioctl is issued.
383 */
384 uint32_t
smb2_fsctl_odx_write(smb_request_t * sr,smb_fsctl_t * fsctl)385 smb2_fsctl_odx_write(smb_request_t *sr, smb_fsctl_t *fsctl)
386 {
387 smb_attr_t dst_attr;
388 odx_write_args_t args;
389 smb_odx_token_t *tok = NULL;
390 smb_ofile_t *ofile = sr->fid_ofile;
391 uint32_t status = NT_STATUS_INVALID_PARAMETER;
392 int rc;
393
394 bzero(&args, sizeof (args));
395 args.out_struct_size = 16;
396
397 if (smb2_odx_enable == 0)
398 return (NT_STATUS_INVALID_DEVICE_REQUEST);
399
400 /*
401 * Make sure the (dst) ofile granted_access allows write.
402 * [MS-FSA] didn't mention this, so it's not clear where
403 * this should happen relative to other checks. Usually
404 * access checks happen early.
405 */
406 status = smb_ofile_access(ofile, ofile->f_cr, FILE_WRITE_DATA);
407 if (status != NT_STATUS_SUCCESS)
408 return (status);
409
410 /*
411 * Decode FSCTL_OFFLOAD_WRITE_INPUT struct,
412 * and do in/out size checks.
413 */
414 rc = smb_mbc_decodef(
415 fsctl->in_mbc, "llqqq",
416 &args.in_struct_size, /* l */
417 &args.in_flags, /* l */
418 &args.in_dstoff, /* q */
419 &args.in_xlen, /* q */
420 &args.in_xoff); /* q */
421 if (rc != 0)
422 return (NT_STATUS_BUFFER_TOO_SMALL);
423 tok = smb_srm_zalloc(sr, sizeof (*tok));
424 rc = smb_odx_get_token(fsctl->in_mbc, tok);
425 if (rc != 0)
426 return (NT_STATUS_BUFFER_TOO_SMALL);
427 if (fsctl->MaxOutputResp < args.out_struct_size)
428 return (NT_STATUS_BUFFER_TOO_SMALL);
429
430 /*
431 * More arg checking per MS-FSA
432 */
433 if ((args.in_dstoff & OFFMASK) != 0 ||
434 (args.in_xoff & OFFMASK) != 0 ||
435 (args.in_xlen & OFFMASK) != 0)
436 return (NT_STATUS_INVALID_PARAMETER);
437 if (args.in_struct_size != (TOKEN_TOTAL_SIZE + 32))
438 return (NT_STATUS_INVALID_PARAMETER);
439 if (args.in_dstoff > INT64_MAX ||
440 (args.in_dstoff + args.in_xlen) < args.in_dstoff)
441 return (NT_STATUS_INVALID_PARAMETER);
442
443 /*
444 * If CopyLength == 0, "return immediately success".
445 */
446 if (args.in_xlen == 0) {
447 status = 0;
448 goto done;
449 }
450
451 /*
452 * [MS-FSA] (summarizing)
453 * If not data stream, or if sparse, encrypted, compressed...
454 * return STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED.
455 *
456 * We'll ignore most of those except to require:
457 * Plain file, not a stream.
458 */
459 if (!smb_node_is_file(ofile->f_node))
460 return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED);
461 if (SMB_IS_STREAM(ofile->f_node))
462 return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED);
463
464 /*
465 * [MS-FSA] If Open.Stream.IsDeleted ...
466 */
467 if (ofile->f_node->flags & NODE_FLAGS_DELETE_COMMITTED)
468 return (NT_STATUS_FILE_DELETED);
469
470 /*
471 * Check for lock conflicting with the write.
472 */
473 status = smb_lock_range_access(sr, ofile->f_node,
474 args.in_dstoff, args.in_xlen, B_TRUE);
475 if (status != 0)
476 return (status); /* == FILE_LOCK_CONFLICT */
477
478 /*
479 * Need the file size
480 */
481 bzero(&dst_attr, sizeof (dst_attr));
482 dst_attr.sa_mask = SMB_AT_SIZE;
483 status = smb2_ofile_getattr(sr, ofile, &dst_attr);
484 if (status != NT_STATUS_SUCCESS)
485 return (status);
486 args.wa_eof = dst_attr.sa_vattr.va_size;
487
488 /*
489 * Destination offset vs. EOF
490 */
491 if (args.in_dstoff > args.wa_eof)
492 return (NT_STATUS_END_OF_FILE);
493
494 /*
495 * Finally, run the I/O
496 */
497 switch (tok->tok_type) {
498 case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
499 status = smb2_fsctl_odx_write_zeros(sr, &args);
500 break;
501 case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
502 status = smb2_fsctl_odx_write_native1(sr, &args, tok);
503 break;
504 default:
505 status = NT_STATUS_INVALID_TOKEN;
506 break;
507 }
508
509 done:
510 /*
511 * Checked MaxOutputResp above, so we can ignore errors
512 * from mbc_encodef here.
513 */
514 if (status == NT_STATUS_SUCCESS) {
515 (void) smb_mbc_encodef(
516 fsctl->out_mbc, "llq",
517 args.out_struct_size,
518 args.out_flags,
519 args.out_xlen);
520 }
521
522 return (status);
523 }
524
525 /*
526 * Handle FSCTL_OFFLOAD_WRITE with token type
527 * STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA
528 *
529 * In this handler, the "token" represents a source of zeros,
530 * limited to the range: in_dstoff to (in_dstoff + in_xlen)
531 *
532 * ODX write handlers are allowed to return any transfer amount
533 * less than or equal to the requested size. We want to limit
534 * the amount of I/O "work" we do per ODX write call. Here,
535 * we're only doing meta-data operations, so we'll allow up to
536 * up to smb2_odx_read_max (256M) per call.
537 *
538 * The I/O "work" done by this function is to make zeros appear
539 * in the file in the range: in_dstoff, (in_dstoff + in_xlen).
540 * Rather than actually write zeros, we'll use VOP_SPACE to
541 * make "holes" in the file. If any of the range we're asked
542 * to zero out is beyond the destination EOF, we can simply
543 * extend the file length (zeros will appear).
544 *
545 * The caller has verified block alignement of:
546 * args->in_dstoff, args->in_xoff, args->in_xlen
547 */
548 static uint32_t
smb2_fsctl_odx_write_zeros(smb_request_t * sr,odx_write_args_t * args)549 smb2_fsctl_odx_write_zeros(smb_request_t *sr, odx_write_args_t *args)
550 {
551 smb_ofile_t *dst_ofile = sr->fid_ofile;
552 uint64_t xlen;
553 int rc;
554
555 ASSERT(args->in_xlen > 0);
556 args->out_xlen = 0;
557
558 /*
559 * Limit the I/O size. (per above)
560 */
561 if (args->in_xlen > smb2_odx_read_max)
562 args->in_xlen = smb2_odx_read_max;
563
564 /*
565 * Handle the part below destination EOF.
566 * (in_dstoff to wa_eof).
567 */
568 if (args->in_dstoff < args->wa_eof) {
569 xlen = args->in_xlen;
570 if ((args->in_dstoff + xlen) > args->wa_eof) {
571 xlen = args->wa_eof - args->in_dstoff;
572 ASSERT(xlen < args->in_xlen);
573 }
574 rc = smb_fsop_freesp(sr, dst_ofile->f_cr, dst_ofile,
575 args->in_dstoff, xlen);
576 if (rc != 0) {
577 /* Let client fall-back to normal copy. */
578 return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED);
579 }
580 }
581
582 /*
583 * Now the part after destination EOF, if any.
584 * Just set the file size.
585 */
586 if ((args->in_dstoff + args->in_xlen) > args->wa_eof) {
587 smb_attr_t attr;
588
589 bzero(&attr, sizeof (smb_attr_t));
590 attr.sa_mask = SMB_AT_SIZE;
591 attr.sa_vattr.va_size = args->in_dstoff + args->in_xlen;
592
593 rc = smb_node_setattr(sr, dst_ofile->f_node,
594 dst_ofile->f_cr, dst_ofile, &attr);
595 if (rc != 0) {
596 return (smb_errno2status(rc));
597 }
598 }
599
600 args->out_xlen = args->in_xlen;
601
602 return (0);
603 }
604
605 /*
606 * Handle FSCTL_OFFLOAD_WRITE with token type
607 * STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1
608 *
609 * For this handler, the token represents a valid range in the
610 * source file (tn1_off to tn1_eof). The token contains enough
611 * information for us to find the tree and file handle that the
612 * client has open on the source file for this copy.
613 *
614 * ODX write handlers are allowed to return any transfer amount
615 * less than or equal to the requested size. We want to limit
616 * the amount of I/O "work" we do per ODX write call. Here,
617 * we're actually copying from another file, so limit transfers
618 * to smb2_odx_write_max (16M) per call.
619 *
620 * Copying past un-aligned end of source file:
621 *
622 * The MS-FSA spec. is silent about copying when the file length is
623 * not block aligned. Clients normally request copying a range that's
624 * the file size rounded up to a block boundary, and expect that copy
625 * to extend the destination as long as the copy has not crossed the
626 * EOF in the source file. This means that the last block we copy
627 * will generally be a partial copy, where the first part comes from
628 * the source file, and the remainider is either zeros or truncated.
629 *
630 * Extending the destination file:
631 *
632 * With a whole file copy, we want the destination file length to
633 * match the source file length, even if it's not block aligned.
634 * We could just never extend the destination file, but there are
635 * WPTS tests that prove that ODX write IS supposed to extend the
636 * destination file when appropriate. This is solved by having
637 * this write handler extend the destination file as long as the
638 * copy has not yet crossed EOF in the source file. After we've
639 * past the source EOF with copying, we'll zero out the remainder
640 * of the block in which the copy stopped, stopping at either the
641 * end of the block or the end of the destination file, whichever
642 * comes first. This guarantees that a future read anywhere in
643 * that range will see either data from the source file or zeros.
644 *
645 * Note that no matter which way we stopped copying, we MUST
646 * return a block-aligned transfer size in our response.
647 * The caller has verified block alignement of:
648 * args->in_dstoff, args->in_xoff, args->in_xlen
649 */
650 static uint32_t
smb2_fsctl_odx_write_native1(smb_request_t * sr,odx_write_args_t * args,smb_odx_token_t * tok)651 smb2_fsctl_odx_write_native1(smb_request_t *sr,
652 odx_write_args_t *args, smb_odx_token_t *tok)
653 {
654 struct tok_native1 *tn1;
655 smb_ofile_t *dst_ofile = sr->fid_ofile;
656 smb_ofile_t *src_ofile = NULL;
657 void *buffer = NULL;
658 size_t bufsize = smb2_odx_buf_size;
659 uint64_t src_offset;
660 uint32_t resid;
661 uint32_t xlen;
662 uint32_t status;
663
664 ASSERT(args->in_xlen > 0);
665 args->out_xlen = 0;
666
667 /*
668 * Limit the I/O size. (per above)
669 */
670 if (args->in_xlen > smb2_odx_write_max)
671 args->in_xlen = smb2_odx_write_max;
672
673 /*
674 * Lookup the source ofile using the "token".
675 */
676 tn1 = &tok->tok_u.u_tok_native1;
677
678 /*
679 * If the source ofile came from another tree, we need to
680 * get the other tree and use it for the fid lookup.
681 * Do that by temporarily changing sr->tid_tree around
682 * the call to smb_ofile_lookup_by_fid().
683 */
684 if (tn1->tn1_tid != sr->smb_tid) {
685 smb_tree_t *saved_tree;
686 smb_tree_t *src_tree;
687
688 src_tree = smb_session_lookup_tree(sr->session,
689 (uint16_t)tn1->tn1_tid);
690 if (src_tree == NULL) {
691 status = NT_STATUS_INVALID_TOKEN;
692 goto out;
693 }
694
695 saved_tree = sr->tid_tree;
696 sr->tid_tree = src_tree;
697
698 src_ofile = smb_ofile_lookup_by_fid(sr,
699 (uint16_t)tn1->tn1_fid.temporal);
700
701 sr->tid_tree = saved_tree;
702 smb_tree_release(src_tree);
703 } else {
704 src_ofile = smb_ofile_lookup_by_fid(sr,
705 (uint16_t)tn1->tn1_fid.temporal);
706 }
707
708 if (src_ofile == NULL ||
709 src_ofile->f_persistid != tn1->tn1_fid.persistent) {
710 status = NT_STATUS_INVALID_TOKEN;
711 goto out;
712 }
713
714 /*
715 * Make sure src_ofile is open on a regular file, and
716 * granted access includes READ_DATA. These were all
717 * validated in ODX READ, so if these checks fail it
718 * means somebody messed with the token or something.
719 */
720 if (!smb_node_is_file(src_ofile->f_node)) {
721 status = NT_STATUS_ACCESS_DENIED;
722 goto out;
723 }
724 status = smb_ofile_access(src_ofile, src_ofile->f_cr, FILE_READ_DATA);
725 if (status != NT_STATUS_SUCCESS)
726 goto out;
727
728 /*
729 * Get a buffer used for copying, always smb2_odx_buf_size
730 *
731 * Rather than sleep for this relatively large allocation,
732 * allow the allocation to fail and return an error.
733 * The client should then fall back to normal copy.
734 */
735 buffer = kmem_alloc(bufsize, KM_NOSLEEP_LAZY);
736 if (buffer == NULL) {
737 status = NT_STATUS_INSUFF_SERVER_RESOURCES;
738 goto out;
739 }
740
741 /*
742 * Note: in_xoff is relative to the beginning of the "token"
743 * (a range of the source file tn1_off, tn1_eof). Make sure
744 * in_xoff is within the range represented by this token.
745 */
746 src_offset = tn1->tn1_off + args->in_xoff;
747 if (src_offset >= tn1->tn1_eof ||
748 src_offset < tn1->tn1_off) {
749 status = NT_STATUS_INVALID_PARAMETER;
750 goto out;
751 }
752
753 /*
754 * Source offset+len vs. source EOF (see top comment)
755 */
756 xlen = (uint32_t)args->in_xlen;
757 if ((src_offset + xlen) > tn1->tn1_eof) {
758 /*
759 * Copying would pass tn1_eof. Reduce xlen.
760 */
761 DTRACE_PROBE3(crossed__eof, smb_request_t *, sr,
762 odx_write_args_t *, args, smb_odx_token_t *, tok);
763 xlen = (uint32_t)(tn1->tn1_eof - src_offset);
764 }
765
766 /*
767 * Copy src to dst for xlen. This MAY extend the dest file.
768 * Note: xlen may be not block-aligned now. Handled below.
769 */
770 resid = xlen;
771 status = smb2_sparse_copy(sr, src_ofile, dst_ofile,
772 src_offset, args->in_dstoff, &resid, buffer, bufsize);
773
774 /*
775 * If the result was a partial copy, round down the reported
776 * transfer size to a block boundary. If we moved any data,
777 * suppress errors on this call. If an error was suppressed,
778 * it will happen again and be returned on the next call.
779 */
780 if (status != 0 || resid != 0) {
781 xlen -= resid;
782 xlen &= ~OFFMASK;
783 args->out_xlen = xlen;
784 /* If we moved any data, suppress errors. */
785 if (xlen > 0)
786 status = 0;
787 goto out;
788 }
789
790 /*
791 * If the copying covered the whole in_xlen, we're done.
792 * The test is >= here just so we can guarantee < below.
793 */
794 if (xlen >= args->in_xlen) {
795 args->out_xlen = args->in_xlen;
796 goto out;
797 }
798
799 /*
800 * Have: xlen < args->in_xlen
801 *
802 * Here we know xlen was reduced because the copy
803 * crossed the source EOF. See top comment.
804 * Set the rounded-up transfer size now, and
805 * deal with the remainder of the last block.
806 */
807 args->out_xlen = (xlen + OFFMASK) & ~OFFMASK;
808
809 /*
810 * If smb2_sparse_copy passed wa_eof, that means we've
811 * extended the file, so the remainder of the last block
812 * written is beyond the destination EOF was, so there's
813 * no need to zero out the remainder. "We're done".
814 */
815 args->in_dstoff += xlen;
816 if (args->in_dstoff >= args->wa_eof)
817 goto out;
818
819 /*
820 * Have: in_dstoff < wa_eof
821 *
822 * Zero out the unwritten part of the last block that
823 * falls before the destination EOF. (Not extending.)
824 * Here, resid is the length of the part we'll zero.
825 */
826 resid = args->out_xlen - xlen;
827 if ((args->in_dstoff + resid) > args->wa_eof)
828 resid = args->wa_eof - args->in_dstoff;
829 if (resid > 0) {
830 int rc;
831 /*
832 * Zero out in_dstoff to wa_eof.
833 */
834 rc = smb_fsop_freesp(sr, dst_ofile->f_cr, dst_ofile,
835 args->in_dstoff, resid);
836 if (rc != 0) {
837 status = smb_errno2status(rc);
838 }
839 }
840
841 out:
842 if (src_ofile != NULL)
843 smb_ofile_release(src_ofile);
844
845 if (buffer != NULL)
846 kmem_free(buffer, bufsize);
847
848 return (status);
849 }
850
851 /*
852 * Get an smb_odx_token_t from the (input) mbuf chain.
853 * Consumes exactly TOKEN_TOTAL_SIZE bytes.
854 */
855 static int
smb_odx_get_token(mbuf_chain_t * mbc,smb_odx_token_t * tok)856 smb_odx_get_token(mbuf_chain_t *mbc, smb_odx_token_t *tok)
857 {
858 mbuf_chain_t tok_mbc;
859 int start_pos = mbc->chain_offset;
860 int rc;
861
862 if (MBC_ROOM_FOR(mbc, TOKEN_TOTAL_SIZE) == 0)
863 return (-1);
864
865 /*
866 * No big-endian support in smb_mbc_encodef, so swap
867 * the big-endian fields: tok_type (32-bits),
868 * (reserved is 16-bit zero, so no swap),
869 * and tok_len (16-bits)
870 */
871 rc = smb_mbc_decodef(
872 mbc, "l..w",
873 &tok->tok_type,
874 /* tok_reserved */
875 &tok->tok_len);
876 if (rc != 0)
877 return (rc);
878 tok->tok_type = BSWAP_32(tok->tok_type);
879 tok->tok_len = BSWAP_16(tok->tok_len);
880
881 if (tok->tok_len > TOKEN_MAX_PAYLOAD)
882 return (-1);
883 rc = MBC_SHADOW_CHAIN(&tok_mbc, mbc,
884 mbc->chain_offset, tok->tok_len);
885 if (rc != 0)
886 return (rc);
887
888 switch (tok->tok_type) {
889 case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
890 /* no payload */
891 break;
892 case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
893 rc = smb_odx_get_token_native1(&tok_mbc,
894 &tok->tok_u.u_tok_native1);
895 break;
896 default:
897 /* caller will error out */
898 break;
899 }
900
901 if (rc == 0) {
902 /* Advance past what we shadowed. */
903 mbc->chain_offset = start_pos + TOKEN_TOTAL_SIZE;
904 }
905
906 return (rc);
907 }
908
909 static int
smb_odx_get_token_native1(mbuf_chain_t * mbc,struct tok_native1 * tn1)910 smb_odx_get_token_native1(mbuf_chain_t *mbc, struct tok_native1 *tn1)
911 {
912 int rc;
913
914 rc = smb_mbc_decodef(
915 mbc, "qqqql",
916 &tn1->tn1_fid.persistent,
917 &tn1->tn1_fid.temporal,
918 &tn1->tn1_off,
919 &tn1->tn1_eof,
920 &tn1->tn1_tid);
921
922 return (rc);
923 }
924
925 /*
926 * Put an smb_odx_token_t into the (output) mbuf chain,
927 * padded to TOKEN_TOTAL_SIZE bytes.
928 */
929 static int
smb_odx_put_token(mbuf_chain_t * mbc,smb_odx_token_t * tok)930 smb_odx_put_token(mbuf_chain_t *mbc, smb_odx_token_t *tok)
931 {
932 int rc, padlen;
933 int start_pos = mbc->chain_offset;
934 int end_pos = start_pos + TOKEN_TOTAL_SIZE;
935
936 if (tok->tok_len > TOKEN_MAX_PAYLOAD)
937 return (-1);
938
939 /*
940 * No big-endian support in smb_mbc_encodef, so swap
941 * the big-endian fields: tok_type (32-bits),
942 * (reserved is 16-bit zero, so no swap),
943 * and tok_len (16-bits)
944 */
945 rc = smb_mbc_encodef(
946 mbc, "lww",
947 BSWAP_32(tok->tok_type),
948 0, /* tok_reserved */
949 BSWAP_16(tok->tok_len));
950 if (rc != 0)
951 return (rc);
952
953 switch (tok->tok_type) {
954 case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA:
955 /* no payload */
956 break;
957 case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1:
958 rc = smb_odx_put_token_native1(mbc,
959 &tok->tok_u.u_tok_native1);
960 break;
961 default:
962 ASSERT(0);
963 return (-1);
964 }
965
966 /* Pad out to TOKEN_TOTAL_SIZE bytes. */
967 if (mbc->chain_offset < end_pos) {
968 padlen = end_pos - mbc->chain_offset;
969 (void) smb_mbc_encodef(mbc, "#.", padlen);
970 }
971 ASSERT(mbc->chain_offset == end_pos);
972
973 return (rc);
974 }
975
976 static int
smb_odx_put_token_native1(mbuf_chain_t * mbc,struct tok_native1 * tn1)977 smb_odx_put_token_native1(mbuf_chain_t *mbc, struct tok_native1 *tn1)
978 {
979 int rc;
980
981 rc = smb_mbc_encodef(
982 mbc, "qqqql",
983 tn1->tn1_fid.persistent,
984 tn1->tn1_fid.temporal,
985 tn1->tn1_off,
986 tn1->tn1_eof,
987 tn1->tn1_tid);
988
989 return (rc);
990 }
991