xref: /linux/fs/orangefs/dir.c (revision 136b43aa4b16563c8010c90c53303b745340d70f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright 2017 Omnibond Systems, L.L.C.
4  */
5 
6 #include <linux/filelock.h>
7 #include "protocol.h"
8 #include "orangefs-kernel.h"
9 #include "orangefs-bufmap.h"
10 
11 struct orangefs_dir_part {
12 	struct orangefs_dir_part *next;
13 	size_t len;
14 };
15 
16 struct orangefs_dir {
17 	__u64 token;
18 	struct orangefs_dir_part *part;
19 	loff_t end;
20 	int error;
21 };
22 
23 #define PART_SHIFT (24)
24 #define PART_SIZE (1<<24)
25 #define PART_MASK (~(PART_SIZE - 1))
26 
27 /*
28  * There can be up to 512 directory entries.  Each entry is encoded as
29  * follows:
30  * 4 bytes: string size (n)
31  * n bytes: string
32  * 1 byte: trailing zero
33  * padding to 8 bytes
34  * 16 bytes: khandle
35  * padding to 8 bytes
36  *
37  * The trailer_buf starts with a struct orangefs_readdir_response_s
38  * which must be skipped to get to the directory data.
39  *
40  * The data which is received from the userspace daemon is termed a
41  * part and is stored in a linked list in case more than one part is
42  * needed for a large directory.
43  *
44  * The position pointer (ctx->pos) encodes the part and offset on which
45  * to begin reading at.  Bits above PART_SHIFT encode the part and bits
46  * below PART_SHIFT encode the offset.  Parts are stored in a linked
47  * list which grows as data is received from the server.  The overhead
48  * associated with managing the list is presumed to be small compared to
49  * the overhead of communicating with the server.
50  *
51  * As data is received from the server, it is placed at the end of the
52  * part list.  Data is parsed from the current position as it is needed.
53  * When data is determined to be corrupt, it is either because the
54  * userspace component has sent back corrupt data or because the file
55  * pointer has been moved to an invalid location.  Since the two cannot
56  * be differentiated, return EIO.
57  *
58  * Part zero is synthesized to contains `.' and `..'.  Part one is the
59  * first part of the part list.
60  */
61 
62 static int do_readdir(struct orangefs_dir *od, struct inode *inode,
63     struct orangefs_kernel_op_s *op)
64 {
65 	struct orangefs_inode_s *oi = ORANGEFS_I(inode);
66 	struct orangefs_readdir_response_s *resp;
67 	int bufi, r;
68 
69 	/*
70 	 * Despite the badly named field, readdir does not use shared
71 	 * memory.  However, there are a limited number of readdir
72 	 * slots, which must be allocated here.  This flag simply tells
73 	 * the op scheduler to return the op here for retry.
74 	 */
75 	op->uses_shared_memory = 1;
76 	op->upcall.req.readdir.refn = oi->refn;
77 	op->upcall.req.readdir.token = od->token;
78 	op->upcall.req.readdir.max_dirent_count =
79 	    ORANGEFS_MAX_DIRENT_COUNT_READDIR;
80 
81 again:
82 	bufi = orangefs_readdir_index_get();
83 	if (bufi < 0) {
84 		od->error = bufi;
85 		return bufi;
86 	}
87 
88 	op->upcall.req.readdir.buf_index = bufi;
89 
90 	r = service_operation(op, "orangefs_readdir",
91 	    get_interruptible_flag(inode));
92 
93 	orangefs_readdir_index_put(bufi);
94 
95 	if (op_state_purged(op)) {
96 		if (r == -EAGAIN) {
97 			vfree(op->downcall.trailer_buf);
98 			goto again;
99 		} else if (r == -EIO) {
100 			vfree(op->downcall.trailer_buf);
101 			od->error = r;
102 			return r;
103 		}
104 	}
105 
106 	if (r < 0) {
107 		vfree(op->downcall.trailer_buf);
108 		od->error = r;
109 		return r;
110 	} else if (op->downcall.status) {
111 		vfree(op->downcall.trailer_buf);
112 		od->error = op->downcall.status;
113 		return op->downcall.status;
114 	}
115 
116 	/*
117 	 * The maximum size is size per entry times the 512 entries plus
118 	 * the header.  This is well under the limit.
119 	 */
120 	if (op->downcall.trailer_size > PART_SIZE) {
121 		vfree(op->downcall.trailer_buf);
122 		od->error = -EIO;
123 		return -EIO;
124 	}
125 
126 	resp = (struct orangefs_readdir_response_s *)
127 	    op->downcall.trailer_buf;
128 	od->token = resp->token;
129 	return 0;
130 }
131 
132 static int parse_readdir(struct orangefs_dir *od,
133     struct orangefs_kernel_op_s *op)
134 {
135 	struct orangefs_dir_part *part, *new;
136 	size_t count;
137 
138 	count = 1;
139 	part = od->part;
140 	while (part) {
141 		count++;
142 		if (part->next)
143 			part = part->next;
144 		else
145 			break;
146 	}
147 
148 	new = (void *)op->downcall.trailer_buf;
149 	new->next = NULL;
150 	new->len = op->downcall.trailer_size -
151 	    sizeof(struct orangefs_readdir_response_s);
152 	if (!od->part)
153 		od->part = new;
154 	else
155 		part->next = new;
156 	count++;
157 	od->end = count << PART_SHIFT;
158 
159 	return 0;
160 }
161 
162 static int orangefs_dir_more(struct orangefs_dir *od, struct inode *inode)
163 {
164 	struct orangefs_kernel_op_s *op;
165 	int r;
166 
167 	op = op_alloc(ORANGEFS_VFS_OP_READDIR);
168 	if (!op) {
169 		od->error = -ENOMEM;
170 		return -ENOMEM;
171 	}
172 	r = do_readdir(od, inode, op);
173 	if (r) {
174 		od->error = r;
175 		goto out;
176 	}
177 	r = parse_readdir(od, op);
178 	if (r) {
179 		od->error = r;
180 		goto out;
181 	}
182 
183 	od->error = 0;
184 out:
185 	op_release(op);
186 	return od->error;
187 }
188 
189 static int fill_from_part(struct orangefs_dir_part *part,
190     struct dir_context *ctx)
191 {
192 	const int offset = sizeof(struct orangefs_readdir_response_s);
193 	struct orangefs_khandle *khandle;
194 	__u32 *len, padlen;
195 	loff_t i;
196 	char *s;
197 	i = ctx->pos & ~PART_MASK;
198 
199 	/* The file offset from userspace is too large. */
200 	if (i > part->len)
201 		return 1;
202 
203 	/*
204 	 * If the seek pointer is positioned just before an entry it
205 	 * should find the next entry.
206 	 */
207 	if (i % 8)
208 		i = i + (8 - i%8)%8;
209 
210 	while (i < part->len) {
211 		if (part->len < i + sizeof *len)
212 			break;
213 		len = (void *)part + offset + i;
214 		/*
215 		 * len is the size of the string itself.  padlen is the
216 		 * total size of the encoded string.
217 		 */
218 		padlen = (sizeof *len + *len + 1) +
219 		    (8 - (sizeof *len + *len + 1)%8)%8;
220 		if (part->len < i + padlen + sizeof *khandle)
221 			goto next;
222 		s = (void *)part + offset + i + sizeof *len;
223 		if (s[*len] != 0)
224 			goto next;
225 		khandle = (void *)part + offset + i + padlen;
226 		if (!dir_emit(ctx, s, *len,
227 		    orangefs_khandle_to_ino(khandle),
228 		    DT_UNKNOWN))
229 			return 0;
230 		i += padlen + sizeof *khandle;
231 		i = i + (8 - i%8)%8;
232 		BUG_ON(i > part->len);
233 		ctx->pos = (ctx->pos & PART_MASK) | i;
234 		continue;
235 next:
236 		i += 8;
237 	}
238 	return 1;
239 }
240 
241 static int orangefs_dir_fill(struct orangefs_dir *od, struct dir_context *ctx)
242 {
243 	struct orangefs_dir_part *part;
244 	size_t count;
245 
246 	count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
247 
248 	part = od->part;
249 	while (part->next && count) {
250 		count--;
251 		part = part->next;
252 	}
253 	/* This means the userspace file offset is invalid. */
254 	if (count) {
255 		od->error = -EIO;
256 		return -EIO;
257 	}
258 
259 	while (part && part->len) {
260 		int r;
261 		r = fill_from_part(part, ctx);
262 		if (r < 0) {
263 			od->error = r;
264 			return r;
265 		} else if (r == 0) {
266 			/* Userspace buffer is full. */
267 			break;
268 		} else {
269 			/*
270 			 * The part ran out of data.  Move to the next
271 			 * part. */
272 			ctx->pos = (ctx->pos & PART_MASK) +
273 			    (1 << PART_SHIFT);
274 			part = part->next;
275 		}
276 	}
277 	return 0;
278 }
279 
280 static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
281     int whence)
282 {
283 	struct orangefs_dir *od = file->private_data;
284 	/*
285 	 * Delete the stored data so userspace sees new directory
286 	 * entries.
287 	 */
288 	if (!whence && offset < od->end) {
289 		struct orangefs_dir_part *part = od->part;
290 		while (part) {
291 			struct orangefs_dir_part *next = part->next;
292 			vfree(part);
293 			part = next;
294 		}
295 		od->token = ORANGEFS_ITERATE_START;
296 		od->part = NULL;
297 		od->end = 1 << PART_SHIFT;
298 	}
299 	return default_llseek(file, offset, whence);
300 }
301 
302 static int orangefs_dir_iterate(struct file *file,
303     struct dir_context *ctx)
304 {
305 	struct orangefs_dir *od = file->private_data;
306 	struct inode *inode = file_inode(file);
307 	int r;
308 
309 	if (od->error)
310 		return od->error;
311 
312 	if (ctx->pos == 0) {
313 		if (!dir_emit_dot(file, ctx))
314 			return 0;
315 		ctx->pos++;
316 	}
317 	if (ctx->pos == 1) {
318 		if (!dir_emit_dotdot(file, ctx))
319 			return 0;
320 		ctx->pos = 1 << PART_SHIFT;
321 	}
322 
323 	/*
324 	 * The seek position is in the first synthesized part but is not
325 	 * valid.
326 	 */
327 	if ((ctx->pos & PART_MASK) == 0)
328 		return -EIO;
329 
330 	r = 0;
331 
332 	/*
333 	 * Must read more if the user has sought past what has been read
334 	 * so far.  Stop a user who has sought past the end.
335 	 */
336 	while (od->token != ORANGEFS_ITERATE_END &&
337 	    ctx->pos > od->end) {
338 		r = orangefs_dir_more(od, inode);
339 		if (r)
340 			return r;
341 	}
342 	if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
343 		return -EIO;
344 
345 	/* Then try to fill if there's any left in the buffer. */
346 	if (ctx->pos < od->end) {
347 		r = orangefs_dir_fill(od, ctx);
348 		if (r)
349 			return r;
350 	}
351 
352 	/* Finally get some more and try to fill. */
353 	if (od->token != ORANGEFS_ITERATE_END) {
354 		r = orangefs_dir_more(od, inode);
355 		if (r)
356 			return r;
357 		r = orangefs_dir_fill(od, ctx);
358 	}
359 
360 	return r;
361 }
362 
363 static int orangefs_dir_open(struct inode *inode, struct file *file)
364 {
365 	struct orangefs_dir *od;
366 	file->private_data = kmalloc(sizeof(struct orangefs_dir),
367 	    GFP_KERNEL);
368 	if (!file->private_data)
369 		return -ENOMEM;
370 	od = file->private_data;
371 	od->token = ORANGEFS_ITERATE_START;
372 	od->part = NULL;
373 	od->end = 1 << PART_SHIFT;
374 	od->error = 0;
375 	return 0;
376 }
377 
378 static int orangefs_dir_release(struct inode *inode, struct file *file)
379 {
380 	struct orangefs_dir *od = file->private_data;
381 	struct orangefs_dir_part *part = od->part;
382 	while (part) {
383 		struct orangefs_dir_part *next = part->next;
384 		vfree(part);
385 		part = next;
386 	}
387 	kfree(od);
388 	return 0;
389 }
390 
391 const struct file_operations orangefs_dir_operations = {
392 	.llseek = orangefs_dir_llseek,
393 	.read = generic_read_dir,
394 	.iterate_shared = orangefs_dir_iterate,
395 	.open = orangefs_dir_open,
396 	.release = orangefs_dir_release,
397 	.setlease = generic_setlease,
398 };
399