1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2018-2021 Tintri by DDN, Inc. All rights reserved. 14 * Copyright 2022 RackTop Systems, Inc. 15 */ 16 17 /* 18 * Support functions for smb2_ioctl/fsctl codes: 19 * FSCTL_OFFLOAD_READ 20 * FSCTL_OFFLOAD_WRITE 21 * (and related) 22 */ 23 24 #include <smbsrv/smb2_kproto.h> 25 #include <smbsrv/smb_fsops.h> 26 #include <smb/winioctl.h> 27 28 /* 29 * Summary of how offload data transfer works: 30 * 31 * The client drives a server-side copy. Outline: 32 * 1: open src_file 33 * 2: create dst_file and set its size 34 * 3: while src_file not all copied { 35 * offload_read(src_file, &token); 36 * while token not all copied { 37 * offload_write(dst_file, token); 38 * } 39 * } 40 * 41 * Each "offload read" request returns a "token" representing some 42 * portion of the source file. The server decides what kind of 43 * token to use, and how much of the source file it should cover. 44 * The length represented may be less then the client requested. 45 * No data are copied during offload_read (just meta-data). 46 * 47 * Each "offload write" request copies some portion of the data 48 * represented by the "token" into the output file. The amount 49 * of data copied may be less than the client requested, and the 50 * client keeps sending offload write requests until they have 51 * copied all the data represented by the current token. 52 */ 53 54 /* [MS-FSA] OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND_CURRENT_RANGE */ 55 #define OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND 1 56 57 /* 58 * [MS-FSCC] 2.3.79 STORAGE_OFFLOAD_TOKEN 59 * Note reserved: 0xFFFF0002 – 0xFFFFFFFF 60 * 61 * ...TOKEN_TYPE_ZERO_DATA: A well-known Token that indicates ... 62 * (offload write should just zero to the destination) 63 * The payload (tok_other) is ignored with this type. 64 */ 65 #define STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA 0xFFFF0001 66 67 /* Our vendor-specific token type: struct tok_native1 */ 68 #define STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1 0x10001 69 70 #define TOKEN_TOTAL_SIZE 512 71 #define TOKEN_MAX_PAYLOAD 504 /* 512 - 8 */ 72 73 /* This mask is for sanity checking offsets etc. */ 74 #define OFFMASK ((uint64_t)DEV_BSIZE-1) 75 76 typedef struct smb_odx_token { 77 uint32_t tok_type; /* big-endian on the wire */ 78 uint16_t tok_reserved; /* zero */ 79 uint16_t tok_len; /* big-endian on the wire */ 80 union { 81 uint8_t u_tok_other[TOKEN_MAX_PAYLOAD]; 82 struct tok_native1 { 83 smb2fid_t tn1_fid; 84 uint64_t tn1_off; 85 uint64_t tn1_eof; 86 uint32_t tn1_tid; 87 } u_tok_native1; 88 } tok_u; 89 } smb_odx_token_t; 90 91 typedef struct odx_write_args { 92 uint32_t in_struct_size; 93 uint32_t in_flags; 94 uint64_t in_dstoff; 95 uint64_t in_xlen; 96 uint64_t in_xoff; 97 uint32_t out_struct_size; 98 uint32_t out_flags; 99 uint64_t out_xlen; 100 uint64_t wa_eof; 101 } odx_write_args_t; 102 103 static int smb_odx_get_token(mbuf_chain_t *, smb_odx_token_t *); 104 static int smb_odx_get_token_native1(mbuf_chain_t *, struct tok_native1 *); 105 static int smb_odx_put_token(mbuf_chain_t *, smb_odx_token_t *); 106 static int smb_odx_put_token_native1(mbuf_chain_t *, struct tok_native1 *); 107 108 static uint32_t smb2_fsctl_odx_write_zeros(smb_request_t *, odx_write_args_t *); 109 static uint32_t smb2_fsctl_odx_write_native1(smb_request_t *, 110 odx_write_args_t *, smb_odx_token_t *); 111 112 113 /* We can disable this feature for testing etc. */ 114 int smb2_odx_enable = 1; 115 116 /* 117 * These two variables determine the intervals of offload_read and 118 * offload_write calls (respectively) during an offload copy. 119 * 120 * For the offload read token we could offer a token representing 121 * the whole file, but we'll have the client come back for a new 122 * "token" after each 256M so we have a chance to look for "holes". 123 * This lets us use the special "zero" token while we're in any 124 * un-allocated parts of the file, so offload_write can use the 125 * (more efficient) smb_fsop_freesp instead of copying. 126 * 127 * We limit the size of offload_write to 16M per request so we 128 * don't end up taking so long with I/O that the client might 129 * time out the request. Keep: write_max <= read_max 130 */ 131 uint32_t smb2_odx_read_max = (1<<28); /* 256M */ 132 uint32_t smb2_odx_write_max = (1<<24); /* 16M */ 133 134 /* 135 * This buffer size determines the I/O size for the copy during 136 * offoad write, where it will read/write using this buffer. 137 * Note: We kmem_alloc this, so don't make it HUGE. It only 138 * needs to be large enough to allow the copy to proceed with 139 * reasonable efficiency. 1M is currently the largest possible 140 * block size with ZFS, so that's what we'll use here. 141 * 142 * Actually, limit this to kmem_max_cached, to avoid contention 143 * allocating from kmem_oversize_arena. 144 */ 145 uint32_t smb2_odx_buf_size = (1<<17); /* 128k */ 146 147 148 /* 149 * FSCTL_OFFLOAD_READ 150 * [MS-FSCC] 2.3.77 151 * 152 * Similar (in concept) to FSCTL_SRV_REQUEST_RESUME_KEY 153 * 154 * The returned data is an (opaque to the client) 512-byte "token" 155 * that represents the specified range (offset, length) of the 156 * source file. The "token" we return here comes back to us in an 157 * FSCTL_OFFLOAD_READ. We must stash whatever we'll need then in 158 * the token we return here. 159 * 160 * We want server-side copy to be able to copy "holes" efficiently, 161 * but would rather avoid the complexity of encoding a list of all 162 * allocated ranges into our returned token, so this compromise: 163 * 164 * When the current range is entirely within a "hole", we'll return 165 * the special "zeros" token, and the offload write using that token 166 * will use the simple and very efficient smb_fsop_freesp. In this 167 * scenario, we'll have a copy stride of smb2_odx_read_max (256M). 168 * 169 * When there's any data in the range to copy, we'll return our 170 * "native" token, and the subsequent offload_write will walk the 171 * allocated ranges copying and/or zeroing as needed. In this 172 * scenario, we'll have a copy stride of smb2_odx_write_max (16M). 173 * 174 * One additional optimization allowed by the protocol is that when 175 * we discover that there's no more data after the current range, 176 * we can set the flag ..._ALL_ZERO_BEYOND which tells that client 177 * they can stop copying here if they like. 178 */ 179 uint32_t 180 smb2_fsctl_odx_read(smb_request_t *sr, smb_fsctl_t *fsctl) 181 { 182 smb_attr_t src_attr; 183 smb_odx_token_t *tok = NULL; 184 struct tok_native1 *tn1; 185 smb_ofile_t *ofile = sr->fid_ofile; 186 uint64_t src_size, src_rnd_size; 187 off64_t data, hole; 188 uint32_t in_struct_size; 189 uint32_t in_flags; 190 uint32_t in_ttl; 191 uint64_t in_file_off; 192 uint64_t in_copy_len; 193 uint64_t out_xlen; 194 uint32_t out_struct_size = TOKEN_TOTAL_SIZE + 16; 195 uint32_t out_flags = 0; 196 uint32_t status; 197 uint32_t tok_type; 198 int rc; 199 200 if (smb2_odx_enable == 0) 201 return (NT_STATUS_INVALID_DEVICE_REQUEST); 202 203 /* 204 * Make sure the (src) ofile granted access allows read. 205 * [MS-FSA] didn't mention this, so it's not clear where 206 * this should happen relative to other checks. Usually 207 * access checks happen early. 208 */ 209 status = smb_ofile_access(ofile, ofile->f_cr, FILE_READ_DATA); 210 if (status != NT_STATUS_SUCCESS) 211 return (status); 212 213 /* 214 * Decode FSCTL_OFFLOAD_READ_INPUT struct, 215 * and do in/out size checks. 216 */ 217 rc = smb_mbc_decodef( 218 fsctl->in_mbc, "lll4.qq", 219 &in_struct_size, /* l */ 220 &in_flags, /* l */ 221 &in_ttl, /* l */ 222 /* reserved 4. */ 223 &in_file_off, /* q */ 224 &in_copy_len); /* q */ 225 if (rc != 0) 226 return (NT_STATUS_BUFFER_TOO_SMALL); 227 if (fsctl->MaxOutputResp < out_struct_size) 228 return (NT_STATUS_BUFFER_TOO_SMALL); 229 230 /* 231 * More arg checking per MS-FSA 232 */ 233 if ((in_file_off & OFFMASK) != 0 || 234 (in_copy_len & OFFMASK) != 0) 235 return (NT_STATUS_INVALID_PARAMETER); 236 if (in_struct_size != 32) 237 return (NT_STATUS_INVALID_PARAMETER); 238 if (in_file_off > INT64_MAX || 239 (in_file_off + in_copy_len) < in_file_off) 240 return (NT_STATUS_INVALID_PARAMETER); 241 242 /* 243 * [MS-FSA] (summarizing) 244 * If not data stream, or if sparse, encrypted, compressed... 245 * return STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED. 246 * 247 * We'll ignore most of those except to require: 248 * Plain file, not a stream. 249 */ 250 if (!smb_node_is_file(ofile->f_node)) 251 return (NT_STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED); 252 if (SMB_IS_STREAM(ofile->f_node)) 253 return (NT_STATUS_OFFLOAD_READ_FILE_NOT_SUPPORTED); 254 255 /* 256 * [MS-FSA] If Open.Stream.IsDeleted ... 257 */ 258 if (ofile->f_node->flags & NODE_FLAGS_DELETE_COMMITTED) 259 return (NT_STATUS_FILE_DELETED); 260 261 /* 262 * If CopyLength == 0, "return immediately success". 263 */ 264 if (in_copy_len == 0) { 265 out_xlen = 0; 266 tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA; 267 goto done; 268 } 269 270 /* 271 * Check for lock conflicting with the read. 272 */ 273 status = smb_lock_range_access(sr, ofile->f_node, 274 in_file_off, in_copy_len, B_FALSE); 275 if (status != 0) 276 return (status); /* == FILE_LOCK_CONFLICT */ 277 278 /* 279 * Get the file size (rounded to a full block) 280 * and check the requested offset. 281 */ 282 bzero(&src_attr, sizeof (src_attr)); 283 src_attr.sa_mask = SMB_AT_SIZE; 284 status = smb2_ofile_getattr(sr, ofile, &src_attr); 285 if (status != NT_STATUS_SUCCESS) 286 return (status); 287 src_size = src_attr.sa_vattr.va_size; 288 if (in_file_off >= src_size) 289 return (NT_STATUS_END_OF_FILE); 290 291 /* 292 * Limit the transfer length based on (rounded) EOF. 293 * Clients expect ranges of whole disk blocks. 294 * If we get a read in this rounded-up range, 295 * we'll supply zeros. 296 */ 297 src_rnd_size = (src_size + OFFMASK) & ~OFFMASK; 298 out_xlen = in_copy_len; 299 if ((in_file_off + out_xlen) > src_rnd_size) 300 out_xlen = src_rnd_size - in_file_off; 301 302 /* 303 * Also, have the client come back for a new token after every 304 * smb2_odx_read_max bytes, so we'll have opportunities to 305 * recognize "holes" in the source file. 306 */ 307 if (out_xlen > smb2_odx_read_max) 308 out_xlen = smb2_odx_read_max; 309 310 /* 311 * Ask the filesystem if there are any allocated regions in 312 * the requested range, and return either the "zeros" token 313 * or our "native" token as appropriate (details above). 314 */ 315 data = in_file_off; 316 tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1; 317 rc = smb_fsop_next_alloc_range(ofile->f_cr, ofile->f_node, 318 &data, &hole); 319 switch (rc) { 320 case 0: 321 /* Found some data. Is it beyond this range? */ 322 if (data >= (in_file_off + out_xlen)) 323 tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA; 324 break; 325 case ENXIO: 326 /* 327 * No data here to EOF. Use TOKEN_TYPE_ZERO_DATA, 328 * but only if we're not crossing src_size, because 329 * type zero cannot preserve unaligned src_size. 330 */ 331 if ((in_file_off + out_xlen) <= src_size) 332 tok_type = STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA; 333 out_flags |= OFFLOAD_READ_FLAG_ALL_ZERO_BEYOND; 334 break; 335 case ENOSYS: /* FS does not support VOP_IOCTL... */ 336 case ENOTTY: /* ... or _FIO_SEEK_DATA, _HOLE */ 337 break; 338 default: 339 cmn_err(CE_NOTE, "smb_fsop_next_alloc_range: rc=%d", rc); 340 break; 341 } 342 343 done: 344 /* Already checked MaxOutputResp */ 345 (void) smb_mbc_encodef( 346 fsctl->out_mbc, "llq", 347 out_struct_size, /* l */ 348 out_flags, /* l */ 349 out_xlen); /* q */ 350 351 /* 352 * Build the ODX token to return 353 */ 354 tok = smb_srm_zalloc(sr, sizeof (*tok)); 355 tok->tok_type = tok_type; 356 tok->tok_reserved = 0; 357 if (tok_type == STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1) { 358 tok->tok_len = sizeof (*tn1); 359 tn1 = &tok->tok_u.u_tok_native1; 360 tn1->tn1_fid.persistent = ofile->f_persistid; 361 tn1->tn1_fid.temporal = ofile->f_fid; 362 tn1->tn1_off = in_file_off; 363 tn1->tn1_eof = src_size; 364 tn1->tn1_tid = sr->smb_tid; 365 } 366 367 rc = smb_odx_put_token(fsctl->out_mbc, tok); 368 if (rc != 0) 369 return (NT_STATUS_BUFFER_TOO_SMALL); 370 371 return (NT_STATUS_SUCCESS); 372 } 373 374 /* 375 * FSCTL_OFFLOAD_WRITE 376 * [MS-FSCC] 2.3.80 377 * 378 * Similar (in concept) to FSCTL_COPYCHUNK_WRITE 379 * 380 * Copies from a source file identified by a "token" 381 * (previously returned by FSCTL_OFFLOAD_READ) 382 * to the file on which the ioctl is issued. 383 */ 384 uint32_t 385 smb2_fsctl_odx_write(smb_request_t *sr, smb_fsctl_t *fsctl) 386 { 387 smb_attr_t dst_attr; 388 odx_write_args_t args; 389 smb_odx_token_t *tok = NULL; 390 smb_ofile_t *ofile = sr->fid_ofile; 391 uint32_t status = NT_STATUS_INVALID_PARAMETER; 392 int rc; 393 394 bzero(&args, sizeof (args)); 395 args.out_struct_size = 16; 396 397 if (smb2_odx_enable == 0) 398 return (NT_STATUS_INVALID_DEVICE_REQUEST); 399 400 /* 401 * Make sure the (dst) ofile granted_access allows write. 402 * [MS-FSA] didn't mention this, so it's not clear where 403 * this should happen relative to other checks. Usually 404 * access checks happen early. 405 */ 406 status = smb_ofile_access(ofile, ofile->f_cr, FILE_WRITE_DATA); 407 if (status != NT_STATUS_SUCCESS) 408 return (status); 409 410 /* 411 * Decode FSCTL_OFFLOAD_WRITE_INPUT struct, 412 * and do in/out size checks. 413 */ 414 rc = smb_mbc_decodef( 415 fsctl->in_mbc, "llqqq", 416 &args.in_struct_size, /* l */ 417 &args.in_flags, /* l */ 418 &args.in_dstoff, /* q */ 419 &args.in_xlen, /* q */ 420 &args.in_xoff); /* q */ 421 if (rc != 0) 422 return (NT_STATUS_BUFFER_TOO_SMALL); 423 tok = smb_srm_zalloc(sr, sizeof (*tok)); 424 rc = smb_odx_get_token(fsctl->in_mbc, tok); 425 if (rc != 0) 426 return (NT_STATUS_BUFFER_TOO_SMALL); 427 if (fsctl->MaxOutputResp < args.out_struct_size) 428 return (NT_STATUS_BUFFER_TOO_SMALL); 429 430 /* 431 * More arg checking per MS-FSA 432 */ 433 if ((args.in_dstoff & OFFMASK) != 0 || 434 (args.in_xoff & OFFMASK) != 0 || 435 (args.in_xlen & OFFMASK) != 0) 436 return (NT_STATUS_INVALID_PARAMETER); 437 if (args.in_struct_size != (TOKEN_TOTAL_SIZE + 32)) 438 return (NT_STATUS_INVALID_PARAMETER); 439 if (args.in_dstoff > INT64_MAX || 440 (args.in_dstoff + args.in_xlen) < args.in_dstoff) 441 return (NT_STATUS_INVALID_PARAMETER); 442 443 /* 444 * If CopyLength == 0, "return immediately success". 445 */ 446 if (args.in_xlen == 0) { 447 status = 0; 448 goto done; 449 } 450 451 /* 452 * [MS-FSA] (summarizing) 453 * If not data stream, or if sparse, encrypted, compressed... 454 * return STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED. 455 * 456 * We'll ignore most of those except to require: 457 * Plain file, not a stream. 458 */ 459 if (!smb_node_is_file(ofile->f_node)) 460 return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED); 461 if (SMB_IS_STREAM(ofile->f_node)) 462 return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED); 463 464 /* 465 * [MS-FSA] If Open.Stream.IsDeleted ... 466 */ 467 if (ofile->f_node->flags & NODE_FLAGS_DELETE_COMMITTED) 468 return (NT_STATUS_FILE_DELETED); 469 470 /* 471 * Check for lock conflicting with the write. 472 */ 473 status = smb_lock_range_access(sr, ofile->f_node, 474 args.in_dstoff, args.in_xlen, B_TRUE); 475 if (status != 0) 476 return (status); /* == FILE_LOCK_CONFLICT */ 477 478 /* 479 * Need the file size 480 */ 481 bzero(&dst_attr, sizeof (dst_attr)); 482 dst_attr.sa_mask = SMB_AT_SIZE; 483 status = smb2_ofile_getattr(sr, ofile, &dst_attr); 484 if (status != NT_STATUS_SUCCESS) 485 return (status); 486 args.wa_eof = dst_attr.sa_vattr.va_size; 487 488 /* 489 * Destination offset vs. EOF 490 */ 491 if (args.in_dstoff > args.wa_eof) 492 return (NT_STATUS_END_OF_FILE); 493 494 /* 495 * Finally, run the I/O 496 */ 497 switch (tok->tok_type) { 498 case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA: 499 status = smb2_fsctl_odx_write_zeros(sr, &args); 500 break; 501 case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1: 502 status = smb2_fsctl_odx_write_native1(sr, &args, tok); 503 break; 504 default: 505 status = NT_STATUS_INVALID_TOKEN; 506 break; 507 } 508 509 done: 510 /* 511 * Checked MaxOutputResp above, so we can ignore errors 512 * from mbc_encodef here. 513 */ 514 if (status == NT_STATUS_SUCCESS) { 515 (void) smb_mbc_encodef( 516 fsctl->out_mbc, "llq", 517 args.out_struct_size, 518 args.out_flags, 519 args.out_xlen); 520 } 521 522 return (status); 523 } 524 525 /* 526 * Handle FSCTL_OFFLOAD_WRITE with token type 527 * STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA 528 * 529 * In this handler, the "token" represents a source of zeros, 530 * limited to the range: in_dstoff to (in_dstoff + in_xlen) 531 * 532 * ODX write handlers are allowed to return any transfer amount 533 * less than or equal to the requested size. We want to limit 534 * the amount of I/O "work" we do per ODX write call. Here, 535 * we're only doing meta-data operations, so we'll allow up to 536 * up to smb2_odx_read_max (256M) per call. 537 * 538 * The I/O "work" done by this function is to make zeros appear 539 * in the file in the range: in_dstoff, (in_dstoff + in_xlen). 540 * Rather than actually write zeros, we'll use VOP_SPACE to 541 * make "holes" in the file. If any of the range we're asked 542 * to zero out is beyond the destination EOF, we can simply 543 * extend the file length (zeros will appear). 544 * 545 * The caller has verified block alignement of: 546 * args->in_dstoff, args->in_xoff, args->in_xlen 547 */ 548 static uint32_t 549 smb2_fsctl_odx_write_zeros(smb_request_t *sr, odx_write_args_t *args) 550 { 551 smb_ofile_t *dst_ofile = sr->fid_ofile; 552 uint64_t xlen; 553 int rc; 554 555 ASSERT(args->in_xlen > 0); 556 args->out_xlen = 0; 557 558 /* 559 * Limit the I/O size. (per above) 560 */ 561 if (args->in_xlen > smb2_odx_read_max) 562 args->in_xlen = smb2_odx_read_max; 563 564 /* 565 * Handle the part below destination EOF. 566 * (in_dstoff to wa_eof). 567 */ 568 if (args->in_dstoff < args->wa_eof) { 569 xlen = args->in_xlen; 570 if ((args->in_dstoff + xlen) > args->wa_eof) { 571 xlen = args->wa_eof - args->in_dstoff; 572 ASSERT(xlen < args->in_xlen); 573 } 574 rc = smb_fsop_freesp(sr, dst_ofile->f_cr, dst_ofile, 575 args->in_dstoff, xlen); 576 if (rc != 0) { 577 /* Let client fall-back to normal copy. */ 578 return (NT_STATUS_OFFLOAD_WRITE_FILE_NOT_SUPPORTED); 579 } 580 } 581 582 /* 583 * Now the part after destination EOF, if any. 584 * Just set the file size. 585 */ 586 if ((args->in_dstoff + args->in_xlen) > args->wa_eof) { 587 smb_attr_t attr; 588 589 bzero(&attr, sizeof (smb_attr_t)); 590 attr.sa_mask = SMB_AT_SIZE; 591 attr.sa_vattr.va_size = args->in_dstoff + args->in_xlen; 592 593 rc = smb_node_setattr(sr, dst_ofile->f_node, 594 dst_ofile->f_cr, dst_ofile, &attr); 595 if (rc != 0) { 596 return (smb_errno2status(rc)); 597 } 598 } 599 600 args->out_xlen = args->in_xlen; 601 602 return (0); 603 } 604 605 /* 606 * Handle FSCTL_OFFLOAD_WRITE with token type 607 * STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1 608 * 609 * For this handler, the token represents a valid range in the 610 * source file (tn1_off to tn1_eof). The token contains enough 611 * information for us to find the tree and file handle that the 612 * client has open on the source file for this copy. 613 * 614 * ODX write handlers are allowed to return any transfer amount 615 * less than or equal to the requested size. We want to limit 616 * the amount of I/O "work" we do per ODX write call. Here, 617 * we're actually copying from another file, so limit transfers 618 * to smb2_odx_write_max (16M) per call. 619 * 620 * Copying past un-aligned end of source file: 621 * 622 * The MS-FSA spec. is silent about copying when the file length is 623 * not block aligned. Clients normally request copying a range that's 624 * the file size rounded up to a block boundary, and expect that copy 625 * to extend the destination as long as the copy has not crossed the 626 * EOF in the source file. This means that the last block we copy 627 * will generally be a partial copy, where the first part comes from 628 * the source file, and the remainider is either zeros or truncated. 629 * 630 * Extending the destination file: 631 * 632 * With a whole file copy, we want the destination file length to 633 * match the source file length, even if it's not block aligned. 634 * We could just never extend the destination file, but there are 635 * WPTS tests that prove that ODX write IS supposed to extend the 636 * destination file when appropriate. This is solved by having 637 * this write handler extend the destination file as long as the 638 * copy has not yet crossed EOF in the source file. After we've 639 * past the source EOF with copying, we'll zero out the remainder 640 * of the block in which the copy stopped, stopping at either the 641 * end of the block or the end of the destination file, whichever 642 * comes first. This guarantees that a future read anywhere in 643 * that range will see either data from the source file or zeros. 644 * 645 * Note that no matter which way we stopped copying, we MUST 646 * return a block-aligned transfer size in our response. 647 * The caller has verified block alignement of: 648 * args->in_dstoff, args->in_xoff, args->in_xlen 649 */ 650 static uint32_t 651 smb2_fsctl_odx_write_native1(smb_request_t *sr, 652 odx_write_args_t *args, smb_odx_token_t *tok) 653 { 654 struct tok_native1 *tn1; 655 smb_ofile_t *dst_ofile = sr->fid_ofile; 656 smb_ofile_t *src_ofile = NULL; 657 void *buffer = NULL; 658 size_t bufsize = smb2_odx_buf_size; 659 uint64_t src_offset; 660 uint32_t resid; 661 uint32_t xlen; 662 uint32_t status; 663 664 ASSERT(args->in_xlen > 0); 665 args->out_xlen = 0; 666 667 /* 668 * Limit the I/O size. (per above) 669 */ 670 if (args->in_xlen > smb2_odx_write_max) 671 args->in_xlen = smb2_odx_write_max; 672 673 /* 674 * Lookup the source ofile using the "token". 675 */ 676 tn1 = &tok->tok_u.u_tok_native1; 677 678 /* 679 * If the source ofile came from another tree, we need to 680 * get the other tree and use it for the fid lookup. 681 * Do that by temporarily changing sr->tid_tree around 682 * the call to smb_ofile_lookup_by_fid(). 683 */ 684 if (tn1->tn1_tid != sr->smb_tid) { 685 smb_tree_t *saved_tree; 686 smb_tree_t *src_tree; 687 688 src_tree = smb_session_lookup_tree(sr->session, 689 (uint16_t)tn1->tn1_tid); 690 if (src_tree == NULL) { 691 status = NT_STATUS_INVALID_TOKEN; 692 goto out; 693 } 694 695 saved_tree = sr->tid_tree; 696 sr->tid_tree = src_tree; 697 698 src_ofile = smb_ofile_lookup_by_fid(sr, 699 (uint16_t)tn1->tn1_fid.temporal); 700 701 sr->tid_tree = saved_tree; 702 smb_tree_release(src_tree); 703 } else { 704 src_ofile = smb_ofile_lookup_by_fid(sr, 705 (uint16_t)tn1->tn1_fid.temporal); 706 } 707 708 if (src_ofile == NULL || 709 src_ofile->f_persistid != tn1->tn1_fid.persistent) { 710 status = NT_STATUS_INVALID_TOKEN; 711 goto out; 712 } 713 714 /* 715 * Make sure src_ofile is open on a regular file, and 716 * granted access includes READ_DATA. These were all 717 * validated in ODX READ, so if these checks fail it 718 * means somebody messed with the token or something. 719 */ 720 if (!smb_node_is_file(src_ofile->f_node)) { 721 status = NT_STATUS_ACCESS_DENIED; 722 goto out; 723 } 724 status = smb_ofile_access(src_ofile, src_ofile->f_cr, FILE_READ_DATA); 725 if (status != NT_STATUS_SUCCESS) 726 goto out; 727 728 /* 729 * Get a buffer used for copying, always smb2_odx_buf_size 730 * 731 * Rather than sleep for this relatively large allocation, 732 * allow the allocation to fail and return an error. 733 * The client should then fall back to normal copy. 734 */ 735 buffer = kmem_alloc(bufsize, KM_NOSLEEP_LAZY); 736 if (buffer == NULL) { 737 status = NT_STATUS_INSUFF_SERVER_RESOURCES; 738 goto out; 739 } 740 741 /* 742 * Note: in_xoff is relative to the beginning of the "token" 743 * (a range of the source file tn1_off, tn1_eof). Make sure 744 * in_xoff is within the range represented by this token. 745 */ 746 src_offset = tn1->tn1_off + args->in_xoff; 747 if (src_offset >= tn1->tn1_eof || 748 src_offset < tn1->tn1_off) { 749 status = NT_STATUS_INVALID_PARAMETER; 750 goto out; 751 } 752 753 /* 754 * Source offset+len vs. source EOF (see top comment) 755 */ 756 xlen = (uint32_t)args->in_xlen; 757 if ((src_offset + xlen) > tn1->tn1_eof) { 758 /* 759 * Copying would pass tn1_eof. Reduce xlen. 760 */ 761 DTRACE_PROBE3(crossed__eof, smb_request_t *, sr, 762 odx_write_args_t *, args, smb_odx_token_t *, tok); 763 xlen = (uint32_t)(tn1->tn1_eof - src_offset); 764 } 765 766 /* 767 * Copy src to dst for xlen. This MAY extend the dest file. 768 * Note: xlen may be not block-aligned now. Handled below. 769 */ 770 resid = xlen; 771 status = smb2_sparse_copy(sr, src_ofile, dst_ofile, 772 src_offset, args->in_dstoff, &resid, buffer, bufsize); 773 774 /* 775 * If the result was a partial copy, round down the reported 776 * transfer size to a block boundary. If we moved any data, 777 * suppress errors on this call. If an error was suppressed, 778 * it will happen again and be returned on the next call. 779 */ 780 if (status != 0 || resid != 0) { 781 xlen -= resid; 782 xlen &= ~OFFMASK; 783 args->out_xlen = xlen; 784 /* If we moved any data, suppress errors. */ 785 if (xlen > 0) 786 status = 0; 787 goto out; 788 } 789 790 /* 791 * If the copying covered the whole in_xlen, we're done. 792 * The test is >= here just so we can guarantee < below. 793 */ 794 if (xlen >= args->in_xlen) { 795 args->out_xlen = args->in_xlen; 796 goto out; 797 } 798 799 /* 800 * Have: xlen < args->in_xlen 801 * 802 * Here we know xlen was reduced because the copy 803 * crossed the source EOF. See top comment. 804 * Set the rounded-up transfer size now, and 805 * deal with the remainder of the last block. 806 */ 807 args->out_xlen = (xlen + OFFMASK) & ~OFFMASK; 808 809 /* 810 * If smb2_sparse_copy passed wa_eof, that means we've 811 * extended the file, so the remainder of the last block 812 * written is beyond the destination EOF was, so there's 813 * no need to zero out the remainder. "We're done". 814 */ 815 args->in_dstoff += xlen; 816 if (args->in_dstoff >= args->wa_eof) 817 goto out; 818 819 /* 820 * Have: in_dstoff < wa_eof 821 * 822 * Zero out the unwritten part of the last block that 823 * falls before the destination EOF. (Not extending.) 824 * Here, resid is the length of the part we'll zero. 825 */ 826 resid = args->out_xlen - xlen; 827 if ((args->in_dstoff + resid) > args->wa_eof) 828 resid = args->wa_eof - args->in_dstoff; 829 if (resid > 0) { 830 int rc; 831 /* 832 * Zero out in_dstoff to wa_eof. 833 */ 834 rc = smb_fsop_freesp(sr, dst_ofile->f_cr, dst_ofile, 835 args->in_dstoff, resid); 836 if (rc != 0) { 837 status = smb_errno2status(rc); 838 } 839 } 840 841 out: 842 if (src_ofile != NULL) 843 smb_ofile_release(src_ofile); 844 845 if (buffer != NULL) 846 kmem_free(buffer, bufsize); 847 848 return (status); 849 } 850 851 /* 852 * Get an smb_odx_token_t from the (input) mbuf chain. 853 * Consumes exactly TOKEN_TOTAL_SIZE bytes. 854 */ 855 static int 856 smb_odx_get_token(mbuf_chain_t *mbc, smb_odx_token_t *tok) 857 { 858 mbuf_chain_t tok_mbc; 859 int start_pos = mbc->chain_offset; 860 int rc; 861 862 if (MBC_ROOM_FOR(mbc, TOKEN_TOTAL_SIZE) == 0) 863 return (-1); 864 865 /* 866 * No big-endian support in smb_mbc_encodef, so swap 867 * the big-endian fields: tok_type (32-bits), 868 * (reserved is 16-bit zero, so no swap), 869 * and tok_len (16-bits) 870 */ 871 rc = smb_mbc_decodef( 872 mbc, "l..w", 873 &tok->tok_type, 874 /* tok_reserved */ 875 &tok->tok_len); 876 if (rc != 0) 877 return (rc); 878 tok->tok_type = BSWAP_32(tok->tok_type); 879 tok->tok_len = BSWAP_16(tok->tok_len); 880 881 if (tok->tok_len > TOKEN_MAX_PAYLOAD) 882 return (-1); 883 rc = MBC_SHADOW_CHAIN(&tok_mbc, mbc, 884 mbc->chain_offset, tok->tok_len); 885 if (rc != 0) 886 return (rc); 887 888 switch (tok->tok_type) { 889 case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA: 890 /* no payload */ 891 break; 892 case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1: 893 rc = smb_odx_get_token_native1(&tok_mbc, 894 &tok->tok_u.u_tok_native1); 895 break; 896 default: 897 /* caller will error out */ 898 break; 899 } 900 901 if (rc == 0) { 902 /* Advance past what we shadowed. */ 903 mbc->chain_offset = start_pos + TOKEN_TOTAL_SIZE; 904 } 905 906 return (rc); 907 } 908 909 static int 910 smb_odx_get_token_native1(mbuf_chain_t *mbc, struct tok_native1 *tn1) 911 { 912 int rc; 913 914 rc = smb_mbc_decodef( 915 mbc, "qqqql", 916 &tn1->tn1_fid.persistent, 917 &tn1->tn1_fid.temporal, 918 &tn1->tn1_off, 919 &tn1->tn1_eof, 920 &tn1->tn1_tid); 921 922 return (rc); 923 } 924 925 /* 926 * Put an smb_odx_token_t into the (output) mbuf chain, 927 * padded to TOKEN_TOTAL_SIZE bytes. 928 */ 929 static int 930 smb_odx_put_token(mbuf_chain_t *mbc, smb_odx_token_t *tok) 931 { 932 int rc, padlen; 933 int start_pos = mbc->chain_offset; 934 int end_pos = start_pos + TOKEN_TOTAL_SIZE; 935 936 if (tok->tok_len > TOKEN_MAX_PAYLOAD) 937 return (-1); 938 939 /* 940 * No big-endian support in smb_mbc_encodef, so swap 941 * the big-endian fields: tok_type (32-bits), 942 * (reserved is 16-bit zero, so no swap), 943 * and tok_len (16-bits) 944 */ 945 rc = smb_mbc_encodef( 946 mbc, "lww", 947 BSWAP_32(tok->tok_type), 948 0, /* tok_reserved */ 949 BSWAP_16(tok->tok_len)); 950 if (rc != 0) 951 return (rc); 952 953 switch (tok->tok_type) { 954 case STORAGE_OFFLOAD_TOKEN_TYPE_ZERO_DATA: 955 /* no payload */ 956 break; 957 case STORAGE_OFFLOAD_TOKEN_TYPE_NATIVE1: 958 rc = smb_odx_put_token_native1(mbc, 959 &tok->tok_u.u_tok_native1); 960 break; 961 default: 962 ASSERT(0); 963 return (-1); 964 } 965 966 /* Pad out to TOKEN_TOTAL_SIZE bytes. */ 967 if (mbc->chain_offset < end_pos) { 968 padlen = end_pos - mbc->chain_offset; 969 (void) smb_mbc_encodef(mbc, "#.", padlen); 970 } 971 ASSERT(mbc->chain_offset == end_pos); 972 973 return (rc); 974 } 975 976 static int 977 smb_odx_put_token_native1(mbuf_chain_t *mbc, struct tok_native1 *tn1) 978 { 979 int rc; 980 981 rc = smb_mbc_encodef( 982 mbc, "qqqql", 983 tn1->tn1_fid.persistent, 984 tn1->tn1_fid.temporal, 985 tn1->tn1_off, 986 tn1->tn1_eof, 987 tn1->tn1_tid); 988 989 return (rc); 990 } 991