1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2015-2021 Tintri by DDN, Inc. All rights reserved.
14 */
15
16 /*
17 * Dispatch function for SMB2_READ
18 * MS-SMB2 sec. 3.3.5.12
19 */
20
21 #include <smbsrv/smb2_kproto.h>
22 #include <smbsrv/smb_fsops.h>
23
24 extern boolean_t smb_allow_unbuffered;
25
26 int smb2_read_zcopy = 1;
27
28 /*
29 * Copy Reduction support.
30 * xuio_t wrapper with additional private data.
31 */
32 typedef struct smb_xuio {
33 xuio_t su_xuio; // keep first!
34 smb_node_t *su_node;
35 uint_t su_ref;
36 } smb_xuio_t;
37
38 /*
39 * Allocate an smb_xuio_t object. This survives long enough
40 * to keep track of buffers loaned to us from the VFS layer.
41 * We'll construct mbufs with "external" buffers setup to
42 * point to the loaned VFS buffers, incrementing the su_ref
43 * count for each. Each such message when free'd will call
44 * the smb_xuio_free function below.
45 */
46 smb_xuio_t *
smb_xuio_alloc(smb_node_t * node)47 smb_xuio_alloc(smb_node_t *node)
48 {
49 smb_xuio_t *su;
50
51 su = kmem_zalloc(sizeof (*su), KM_SLEEP);
52 su->su_node = node;
53 smb_node_ref(node);
54
55 /*
56 * Initial ref count set to 1, later incremented
57 * for the mbufs that refer to borrowed buffers
58 * owned by this xuio. See smb_xuio_to_mbuf().
59 */
60 su->su_ref = 1;
61 su->su_xuio.xu_type = UIOTYPE_ZEROCOPY;
62
63 return (su);
64 }
65
66 /*
67 * Callback function to return the loaned buffers.
68 * Calls VOP_RETZCBUF() only after all messages with
69 * references to this xuio are free'd.
70 */
71 void
smb_xuio_free(void * varg)72 smb_xuio_free(void *varg)
73 {
74 uint_t ref;
75 smb_xuio_t *su = (smb_xuio_t *)varg;
76 xuio_t *xu = &su->su_xuio;
77
78 ref = atomic_dec_uint_nv(&su->su_ref);
79 if (ref != 0)
80 return;
81
82 /* The XUIO flag is set by VOP_REQZCBUF */
83 if (xu->xu_uio.uio_extflg & UIO_XUIO) {
84 (void) smb_fsop_retzcbuf(su->su_node, xu, CRED());
85 }
86
87 smb_node_release(su->su_node);
88 kmem_free(su, sizeof (*su));
89 }
90
91 /*
92 * Wrapper for smb_mbuf_alloc_ext free function because the
93 * free function is passed a pointer to the mbuf, not arg1.
94 */
95 static void
smb_xuio_mbuf_free(mbuf_t * m)96 smb_xuio_mbuf_free(mbuf_t *m)
97 {
98 ASSERT((m->m_flags & M_EXT) != 0);
99 smb_xuio_free(m->m_ext.ext_arg1);
100 /* caller clears m_ext.ext_buf */
101 }
102
103 /*
104 * Build list of mbufs pointing to the loaned xuio buffers.
105 * Note these are not visible yet to other threads, so
106 * not using atomics to adjust su_ref.
107 */
108 static mbuf_t *
smb_xuio_to_mbuf(smb_xuio_t * su)109 smb_xuio_to_mbuf(smb_xuio_t *su)
110 {
111 uio_t *uiop;
112 struct iovec *iovp;
113 mbuf_t *mp, *mp1;
114 int i;
115
116 uiop = &su->su_xuio.xu_uio;
117 if (uiop->uio_iovcnt == 0)
118 return (NULL);
119
120 iovp = uiop->uio_iov;
121
122 mp = smb_mbuf_alloc_ext(iovp->iov_base, iovp->iov_len,
123 smb_xuio_mbuf_free, su);
124 ASSERT(mp != NULL);
125 su->su_ref++;
126
127 mp1 = mp;
128 for (i = 1; i < uiop->uio_iovcnt; i++) {
129 iovp = (uiop->uio_iov + i);
130
131 mp1->m_next = smb_mbuf_alloc_ext(iovp->iov_base,
132 iovp->iov_len, smb_xuio_mbuf_free, su);
133
134 mp1 = mp1->m_next;
135 ASSERT(mp1 != NULL);
136 su->su_ref++;
137 }
138
139 return (mp);
140 }
141
142 smb_sdrc_t
smb2_read(smb_request_t * sr)143 smb2_read(smb_request_t *sr)
144 {
145 smb_rw_param_t *param = NULL;
146 smb_ofile_t *of = NULL;
147 smb_vdb_t *vdb = NULL;
148 struct mbuf *m = NULL;
149 smb_xuio_t *su = NULL;
150 uio_t *uio = NULL;
151 uint16_t StructSize;
152 uint8_t Padding;
153 uint8_t Flags;
154 uint8_t DataOff;
155 uint32_t Length;
156 uint64_t Offset;
157 smb2fid_t smb2fid;
158 uint32_t MinCount;
159 uint32_t Channel;
160 uint32_t Remaining;
161 uint16_t ChanInfoOffset;
162 uint16_t ChanInfoLength;
163 uint32_t XferCount = 0;
164 uint32_t status;
165 int rc = 0;
166 int ioflag = 0;
167 boolean_t unbuffered = B_FALSE;
168 boolean_t zcopy = B_FALSE;
169
170 /*
171 * SMB2 Read request
172 */
173 rc = smb_mbc_decodef(
174 &sr->smb_data,
175 "wbblqqqlllww",
176 &StructSize, /* w */
177 &Padding, /* b */
178 &Flags, /* b */
179 &Length, /* l */
180 &Offset, /* q */
181 &smb2fid.persistent, /* q */
182 &smb2fid.temporal, /* q */
183 &MinCount, /* l */
184 &Channel, /* l */
185 &Remaining, /* l */
186 &ChanInfoOffset, /* w */
187 &ChanInfoLength); /* w */
188 if (rc)
189 return (SDRC_ERROR);
190 if (StructSize != 49)
191 return (SDRC_ERROR);
192
193 /*
194 * Setup an smb_rw_param_t which contains the VDB we need.
195 * This is automatically free'd.
196 */
197 param = smb_srm_zalloc(sr, sizeof (*param));
198 param->rw_offset = Offset;
199 param->rw_count = Length;
200 /* Note that the dtrace provider uses sr->arg.rw */
201 sr->arg.rw = param;
202
203 /*
204 * Want FID lookup before the start probe.
205 */
206 status = smb2sr_lookup_fid(sr, &smb2fid);
207 of = sr->fid_ofile;
208
209 DTRACE_SMB2_START(op__Read, smb_request_t *, sr); /* arg.rw */
210
211 if (status != 0)
212 goto done; /* Bad FID */
213
214 /*
215 * Short-circuit zero-byte read, otherwise could panic
216 * setting up buffers in smb_mbuf_allocate etc.
217 */
218 if (Length == 0)
219 goto done;
220
221 if (Length > smb2_max_rwsize) {
222 status = NT_STATUS_INVALID_PARAMETER;
223 goto done;
224 }
225 if (MinCount > Length)
226 MinCount = Length;
227
228 vdb = ¶m->rw_vdb;
229 vdb->vdb_tag = 0;
230 vdb->vdb_uio.uio_iov = &vdb->vdb_iovec[0];
231 vdb->vdb_uio.uio_iovcnt = MAX_IOVEC;
232 vdb->vdb_uio.uio_resid = Length;
233 vdb->vdb_uio.uio_loffset = (offset_t)Offset;
234 vdb->vdb_uio.uio_segflg = UIO_SYSSPACE;
235 vdb->vdb_uio.uio_extflg = UIO_COPY_DEFAULT;
236
237 /*
238 * Unbuffered refers to the MS-FSA Read argument by the same name.
239 * It indicates that the cache for this range should be flushed to disk,
240 * and data read directly from disk, bypassing the cache.
241 * We don't allow that degree of cache management.
242 * Translate this directly as FRSYNC,
243 * which should at least flush the cache first.
244 */
245
246 if (smb_allow_unbuffered &&
247 (Flags & SMB2_READFLAG_READ_UNBUFFERED) != 0) {
248 unbuffered = B_TRUE;
249 ioflag = FRSYNC;
250 }
251
252 switch (of->f_tree->t_res_type & STYPE_MASK) {
253 case STYPE_DISKTREE:
254 if (smb_node_is_dir(of->f_node)) {
255 rc = EISDIR;
256 break;
257 }
258 /* Check for conflicting locks. */
259 rc = smb_lock_range_access(sr, of->f_node,
260 Offset, Length, B_FALSE);
261 if (rc) {
262 rc = ERANGE;
263 break;
264 }
265
266 zcopy = (smb2_read_zcopy != 0);
267 if (zcopy) {
268 su = smb_xuio_alloc(of->f_node);
269 uio = &su->su_xuio.xu_uio;
270 uio->uio_segflg = UIO_SYSSPACE;
271 uio->uio_loffset = (offset_t)Offset;
272 uio->uio_resid = Length;
273
274 rc = smb_fsop_reqzcbuf(of->f_node, &su->su_xuio,
275 UIO_READ, of->f_cr);
276 if (rc == 0) {
277 ASSERT((uio->uio_extflg & UIO_XUIO) != 0);
278 } else {
279 ASSERT((uio->uio_extflg & UIO_XUIO) == 0);
280 smb_xuio_free(su);
281 su = NULL;
282 uio = NULL;
283 zcopy = B_FALSE;
284 }
285 }
286 if (!zcopy) {
287 sr->raw_data.max_bytes = Length;
288 m = smb_mbuf_allocate(&vdb->vdb_uio);
289 uio = &vdb->vdb_uio;
290 }
291
292 rc = smb_fsop_read(sr, of->f_cr, of->f_node, of, uio, ioflag);
293 if (rc != 0) {
294 if (zcopy) {
295 smb_xuio_free(su);
296 su = NULL;
297 uio = NULL;
298 }
299 m_freem(m);
300 m = NULL;
301 break;
302 }
303
304 /* How much data we moved. */
305 XferCount = Length - uio->uio_resid;
306
307 if (zcopy) {
308 /*
309 * Build mblk chain of messages pointing to
310 * the loaned buffers in su->su_xuio
311 * Done with su (and uio) after this.
312 * NB: uio points into su->su_xuio
313 */
314 ASSERT(m == NULL);
315 m = smb_xuio_to_mbuf(su);
316 smb_xuio_free(su);
317 su = NULL;
318 uio = NULL;
319 }
320
321 sr->raw_data.max_bytes = XferCount;
322 smb_mbuf_trim(m, XferCount);
323 MBC_ATTACH_MBUF(&sr->raw_data, m);
324
325 break;
326
327 case STYPE_IPC:
328 if (unbuffered) {
329 rc = EINVAL;
330 break;
331 }
332 sr->raw_data.max_bytes = Length;
333 m = smb_mbuf_allocate(&vdb->vdb_uio);
334
335 rc = smb_opipe_read(sr, &vdb->vdb_uio);
336
337 /* How much data we moved. */
338 XferCount = Length - vdb->vdb_uio.uio_resid;
339 sr->raw_data.max_bytes = XferCount;
340 smb_mbuf_trim(m, XferCount);
341 MBC_ATTACH_MBUF(&sr->raw_data, m);
342 break;
343
344 default:
345 case STYPE_PRINTQ:
346 rc = EACCES;
347 break;
348 }
349 status = smb_errno2status(rc);
350
351 /*
352 * [MS-SMB2] If the read returns fewer bytes than specified by
353 * the MinimumCount field of the request, the server MUST fail
354 * the request with STATUS_END_OF_FILE
355 */
356 if (status == 0 && XferCount < MinCount)
357 status = NT_STATUS_END_OF_FILE;
358
359 /*
360 * Checking the error return _after_ dealing with
361 * the returned data so that if m was allocated,
362 * it will be free'd via sr->raw_data cleanup.
363 */
364 done:
365 sr->smb2_status = status;
366 DTRACE_SMB2_DONE(op__Read, smb_request_t *, sr); /* arg.rw */
367 if (status) {
368 smb2sr_put_error(sr, status);
369 return (SDRC_SUCCESS);
370 }
371
372 /*
373 * SMB2 Read reply
374 */
375 DataOff = SMB2_HDR_SIZE + 16;
376 rc = smb_mbc_encodef(
377 &sr->reply,
378 "wb.lllC",
379 17, /* StructSize */ /* w */
380 DataOff, /* b. */
381 XferCount, /* l */
382 0, /* DataRemaining */ /* l */
383 0, /* reserved */ /* l */
384 &sr->raw_data); /* C */
385 if (rc) {
386 sr->smb2_status = NT_STATUS_INTERNAL_ERROR;
387 return (SDRC_ERROR);
388 }
389
390 mutex_enter(&of->f_mutex);
391 of->f_seek_pos = Offset + XferCount;
392 mutex_exit(&of->f_mutex);
393
394 return (SDRC_SUCCESS);
395 }
396