1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright 2017 Omnibond Systems, L.L.C.
4 */
5
6 #include <linux/filelock.h>
7 #include "protocol.h"
8 #include "orangefs-kernel.h"
9 #include "orangefs-bufmap.h"
10
11 struct orangefs_dir_part {
12 struct orangefs_dir_part *next;
13 size_t len;
14 };
15
16 struct orangefs_dir {
17 __u64 token;
18 struct orangefs_dir_part *part;
19 loff_t end;
20 int error;
21 };
22
23 #define PART_SHIFT (24)
24 #define PART_SIZE (1<<24)
25 #define PART_MASK (~(PART_SIZE - 1))
26
27 /*
28 * There can be up to 512 directory entries. Each entry is encoded as
29 * follows:
30 * 4 bytes: string size (n)
31 * n bytes: string
32 * 1 byte: trailing zero
33 * padding to 8 bytes
34 * 16 bytes: khandle
35 * padding to 8 bytes
36 *
37 * The trailer_buf starts with a struct orangefs_readdir_response_s
38 * which must be skipped to get to the directory data.
39 *
40 * The data which is received from the userspace daemon is termed a
41 * part and is stored in a linked list in case more than one part is
42 * needed for a large directory.
43 *
44 * The position pointer (ctx->pos) encodes the part and offset on which
45 * to begin reading at. Bits above PART_SHIFT encode the part and bits
46 * below PART_SHIFT encode the offset. Parts are stored in a linked
47 * list which grows as data is received from the server. The overhead
48 * associated with managing the list is presumed to be small compared to
49 * the overhead of communicating with the server.
50 *
51 * As data is received from the server, it is placed at the end of the
52 * part list. Data is parsed from the current position as it is needed.
53 * When data is determined to be corrupt, it is either because the
54 * userspace component has sent back corrupt data or because the file
55 * pointer has been moved to an invalid location. Since the two cannot
56 * be differentiated, return EIO.
57 *
58 * Part zero is synthesized to contains `.' and `..'. Part one is the
59 * first part of the part list.
60 */
61
do_readdir(struct orangefs_dir * od,struct inode * inode,struct orangefs_kernel_op_s * op)62 static int do_readdir(struct orangefs_dir *od, struct inode *inode,
63 struct orangefs_kernel_op_s *op)
64 {
65 struct orangefs_inode_s *oi = ORANGEFS_I(inode);
66 struct orangefs_readdir_response_s *resp;
67 int bufi, r;
68
69 /*
70 * Despite the badly named field, readdir does not use shared
71 * memory. However, there are a limited number of readdir
72 * slots, which must be allocated here. This flag simply tells
73 * the op scheduler to return the op here for retry.
74 */
75 op->uses_shared_memory = 1;
76 op->upcall.req.readdir.refn = oi->refn;
77 op->upcall.req.readdir.token = od->token;
78 op->upcall.req.readdir.max_dirent_count =
79 ORANGEFS_MAX_DIRENT_COUNT_READDIR;
80
81 again:
82 bufi = orangefs_readdir_index_get();
83 if (bufi < 0) {
84 od->error = bufi;
85 return bufi;
86 }
87
88 op->upcall.req.readdir.buf_index = bufi;
89
90 r = service_operation(op, "orangefs_readdir",
91 get_interruptible_flag(inode));
92
93 orangefs_readdir_index_put(bufi);
94
95 if (op_state_purged(op)) {
96 if (r == -EAGAIN) {
97 vfree(op->downcall.trailer_buf);
98 goto again;
99 } else if (r == -EIO) {
100 vfree(op->downcall.trailer_buf);
101 od->error = r;
102 return r;
103 }
104 }
105
106 if (r < 0) {
107 vfree(op->downcall.trailer_buf);
108 od->error = r;
109 return r;
110 } else if (op->downcall.status) {
111 vfree(op->downcall.trailer_buf);
112 od->error = op->downcall.status;
113 return op->downcall.status;
114 }
115
116 /*
117 * The maximum size is size per entry times the 512 entries plus
118 * the header. This is well under the limit.
119 */
120 if (op->downcall.trailer_size > PART_SIZE) {
121 vfree(op->downcall.trailer_buf);
122 od->error = -EIO;
123 return -EIO;
124 }
125
126 resp = (struct orangefs_readdir_response_s *)
127 op->downcall.trailer_buf;
128 od->token = resp->token;
129 return 0;
130 }
131
parse_readdir(struct orangefs_dir * od,struct orangefs_kernel_op_s * op)132 static int parse_readdir(struct orangefs_dir *od,
133 struct orangefs_kernel_op_s *op)
134 {
135 struct orangefs_dir_part *part, *new;
136 size_t count;
137
138 count = 1;
139 part = od->part;
140 while (part) {
141 count++;
142 if (part->next)
143 part = part->next;
144 else
145 break;
146 }
147
148 new = (void *)op->downcall.trailer_buf;
149 new->next = NULL;
150 new->len = op->downcall.trailer_size -
151 sizeof(struct orangefs_readdir_response_s);
152 if (!od->part)
153 od->part = new;
154 else
155 part->next = new;
156 count++;
157 od->end = count << PART_SHIFT;
158
159 return 0;
160 }
161
orangefs_dir_more(struct orangefs_dir * od,struct inode * inode)162 static int orangefs_dir_more(struct orangefs_dir *od, struct inode *inode)
163 {
164 struct orangefs_kernel_op_s *op;
165 int r;
166
167 op = op_alloc(ORANGEFS_VFS_OP_READDIR);
168 if (!op) {
169 od->error = -ENOMEM;
170 return -ENOMEM;
171 }
172 r = do_readdir(od, inode, op);
173 if (r) {
174 od->error = r;
175 goto out;
176 }
177 r = parse_readdir(od, op);
178 if (r) {
179 od->error = r;
180 goto out;
181 }
182
183 od->error = 0;
184 out:
185 op_release(op);
186 return od->error;
187 }
188
fill_from_part(struct orangefs_dir_part * part,struct dir_context * ctx)189 static int fill_from_part(struct orangefs_dir_part *part,
190 struct dir_context *ctx)
191 {
192 const int offset = sizeof(struct orangefs_readdir_response_s);
193 struct orangefs_khandle *khandle;
194 __u32 *len, padlen;
195 loff_t i;
196 char *s;
197 i = ctx->pos & ~PART_MASK;
198
199 /* The file offset from userspace is too large. */
200 if (i > part->len)
201 return 1;
202
203 /*
204 * If the seek pointer is positioned just before an entry it
205 * should find the next entry.
206 */
207 if (i % 8)
208 i = i + (8 - i%8)%8;
209
210 while (i < part->len) {
211 if (part->len < i + sizeof *len)
212 break;
213 len = (void *)part + offset + i;
214 /*
215 * len is the size of the string itself. padlen is the
216 * total size of the encoded string.
217 */
218 padlen = (sizeof *len + *len + 1) +
219 (8 - (sizeof *len + *len + 1)%8)%8;
220 if (part->len < i + padlen + sizeof *khandle)
221 goto next;
222 s = (void *)part + offset + i + sizeof *len;
223 if (s[*len] != 0)
224 goto next;
225 khandle = (void *)part + offset + i + padlen;
226 if (!dir_emit(ctx, s, *len,
227 orangefs_khandle_to_ino(khandle),
228 DT_UNKNOWN))
229 return 0;
230 i += padlen + sizeof *khandle;
231 i = i + (8 - i%8)%8;
232 BUG_ON(i > part->len);
233 ctx->pos = (ctx->pos & PART_MASK) | i;
234 continue;
235 next:
236 i += 8;
237 }
238 return 1;
239 }
240
orangefs_dir_fill(struct orangefs_dir * od,struct dir_context * ctx)241 static int orangefs_dir_fill(struct orangefs_dir *od, struct dir_context *ctx)
242 {
243 struct orangefs_dir_part *part;
244 size_t count;
245
246 count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
247
248 part = od->part;
249 while (part->next && count) {
250 count--;
251 part = part->next;
252 }
253 /* This means the userspace file offset is invalid. */
254 if (count) {
255 od->error = -EIO;
256 return -EIO;
257 }
258
259 while (part && part->len) {
260 int r;
261 r = fill_from_part(part, ctx);
262 if (r < 0) {
263 od->error = r;
264 return r;
265 } else if (r == 0) {
266 /* Userspace buffer is full. */
267 break;
268 } else {
269 /*
270 * The part ran out of data. Move to the next
271 * part. */
272 ctx->pos = (ctx->pos & PART_MASK) +
273 (1 << PART_SHIFT);
274 part = part->next;
275 }
276 }
277 return 0;
278 }
279
orangefs_dir_llseek(struct file * file,loff_t offset,int whence)280 static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
281 int whence)
282 {
283 struct orangefs_dir *od = file->private_data;
284 /*
285 * Delete the stored data so userspace sees new directory
286 * entries.
287 */
288 if (!whence && offset < od->end) {
289 struct orangefs_dir_part *part = od->part;
290 while (part) {
291 struct orangefs_dir_part *next = part->next;
292 vfree(part);
293 part = next;
294 }
295 od->token = ORANGEFS_ITERATE_START;
296 od->part = NULL;
297 od->end = 1 << PART_SHIFT;
298 }
299 return default_llseek(file, offset, whence);
300 }
301
orangefs_dir_iterate(struct file * file,struct dir_context * ctx)302 static int orangefs_dir_iterate(struct file *file,
303 struct dir_context *ctx)
304 {
305 struct orangefs_dir *od = file->private_data;
306 struct inode *inode = file_inode(file);
307 int r;
308
309 if (od->error)
310 return od->error;
311
312 if (ctx->pos == 0) {
313 if (!dir_emit_dot(file, ctx))
314 return 0;
315 ctx->pos++;
316 }
317 if (ctx->pos == 1) {
318 if (!dir_emit_dotdot(file, ctx))
319 return 0;
320 ctx->pos = 1 << PART_SHIFT;
321 }
322
323 /*
324 * The seek position is in the first synthesized part but is not
325 * valid.
326 */
327 if ((ctx->pos & PART_MASK) == 0)
328 return -EIO;
329
330 r = 0;
331
332 /*
333 * Must read more if the user has sought past what has been read
334 * so far. Stop a user who has sought past the end.
335 */
336 while (od->token != ORANGEFS_ITERATE_END &&
337 ctx->pos > od->end) {
338 r = orangefs_dir_more(od, inode);
339 if (r)
340 return r;
341 }
342 if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
343 return -EIO;
344
345 /* Then try to fill if there's any left in the buffer. */
346 if (ctx->pos < od->end) {
347 r = orangefs_dir_fill(od, ctx);
348 if (r)
349 return r;
350 }
351
352 /* Finally get some more and try to fill. */
353 if (od->token != ORANGEFS_ITERATE_END) {
354 r = orangefs_dir_more(od, inode);
355 if (r)
356 return r;
357 r = orangefs_dir_fill(od, ctx);
358 }
359
360 return r;
361 }
362
orangefs_dir_open(struct inode * inode,struct file * file)363 static int orangefs_dir_open(struct inode *inode, struct file *file)
364 {
365 struct orangefs_dir *od;
366 file->private_data = kmalloc_obj(struct orangefs_dir);
367 if (!file->private_data)
368 return -ENOMEM;
369 od = file->private_data;
370 od->token = ORANGEFS_ITERATE_START;
371 od->part = NULL;
372 od->end = 1 << PART_SHIFT;
373 od->error = 0;
374 return 0;
375 }
376
orangefs_dir_release(struct inode * inode,struct file * file)377 static int orangefs_dir_release(struct inode *inode, struct file *file)
378 {
379 struct orangefs_dir *od = file->private_data;
380 struct orangefs_dir_part *part = od->part;
381 while (part) {
382 struct orangefs_dir_part *next = part->next;
383 vfree(part);
384 part = next;
385 }
386 kfree(od);
387 return 0;
388 }
389
390 const struct file_operations orangefs_dir_operations = {
391 .llseek = orangefs_dir_llseek,
392 .read = generic_read_dir,
393 .iterate_shared = orangefs_dir_iterate,
394 .open = orangefs_dir_open,
395 .release = orangefs_dir_release,
396 .setlease = generic_setlease,
397 };
398