xref: /linux/fs/orangefs/dir.c (revision bf80eef2212a1e8451df13b52533f4bc31bb4f8e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright 2017 Omnibond Systems, L.L.C.
4  */
5 
6 #include "protocol.h"
7 #include "orangefs-kernel.h"
8 #include "orangefs-bufmap.h"
9 
10 struct orangefs_dir_part {
11 	struct orangefs_dir_part *next;
12 	size_t len;
13 };
14 
15 struct orangefs_dir {
16 	__u64 token;
17 	struct orangefs_dir_part *part;
18 	loff_t end;
19 	int error;
20 };
21 
22 #define PART_SHIFT (24)
23 #define PART_SIZE (1<<24)
24 #define PART_MASK (~(PART_SIZE - 1))
25 
26 /*
27  * There can be up to 512 directory entries.  Each entry is encoded as
28  * follows:
29  * 4 bytes: string size (n)
30  * n bytes: string
31  * 1 byte: trailing zero
32  * padding to 8 bytes
33  * 16 bytes: khandle
34  * padding to 8 bytes
35  *
36  * The trailer_buf starts with a struct orangefs_readdir_response_s
37  * which must be skipped to get to the directory data.
38  *
39  * The data which is received from the userspace daemon is termed a
40  * part and is stored in a linked list in case more than one part is
41  * needed for a large directory.
42  *
43  * The position pointer (ctx->pos) encodes the part and offset on which
44  * to begin reading at.  Bits above PART_SHIFT encode the part and bits
45  * below PART_SHIFT encode the offset.  Parts are stored in a linked
46  * list which grows as data is received from the server.  The overhead
47  * associated with managing the list is presumed to be small compared to
48  * the overhead of communicating with the server.
49  *
50  * As data is received from the server, it is placed at the end of the
51  * part list.  Data is parsed from the current position as it is needed.
52  * When data is determined to be corrupt, it is either because the
53  * userspace component has sent back corrupt data or because the file
54  * pointer has been moved to an invalid location.  Since the two cannot
55  * be differentiated, return EIO.
56  *
57  * Part zero is synthesized to contains `.' and `..'.  Part one is the
58  * first part of the part list.
59  */
60 
61 static int do_readdir(struct orangefs_inode_s *oi,
62     struct orangefs_dir *od, struct dentry *dentry,
63     struct orangefs_kernel_op_s *op)
64 {
65 	struct orangefs_readdir_response_s *resp;
66 	int bufi, r;
67 
68 	/*
69 	 * Despite the badly named field, readdir does not use shared
70 	 * memory.  However, there are a limited number of readdir
71 	 * slots, which must be allocated here.  This flag simply tells
72 	 * the op scheduler to return the op here for retry.
73 	 */
74 	op->uses_shared_memory = 1;
75 	op->upcall.req.readdir.refn = oi->refn;
76 	op->upcall.req.readdir.token = od->token;
77 	op->upcall.req.readdir.max_dirent_count =
78 	    ORANGEFS_MAX_DIRENT_COUNT_READDIR;
79 
80 again:
81 	bufi = orangefs_readdir_index_get();
82 	if (bufi < 0) {
83 		od->error = bufi;
84 		return bufi;
85 	}
86 
87 	op->upcall.req.readdir.buf_index = bufi;
88 
89 	r = service_operation(op, "orangefs_readdir",
90 	    get_interruptible_flag(dentry->d_inode));
91 
92 	orangefs_readdir_index_put(bufi);
93 
94 	if (op_state_purged(op)) {
95 		if (r == -EAGAIN) {
96 			vfree(op->downcall.trailer_buf);
97 			goto again;
98 		} else if (r == -EIO) {
99 			vfree(op->downcall.trailer_buf);
100 			od->error = r;
101 			return r;
102 		}
103 	}
104 
105 	if (r < 0) {
106 		vfree(op->downcall.trailer_buf);
107 		od->error = r;
108 		return r;
109 	} else if (op->downcall.status) {
110 		vfree(op->downcall.trailer_buf);
111 		od->error = op->downcall.status;
112 		return op->downcall.status;
113 	}
114 
115 	/*
116 	 * The maximum size is size per entry times the 512 entries plus
117 	 * the header.  This is well under the limit.
118 	 */
119 	if (op->downcall.trailer_size > PART_SIZE) {
120 		vfree(op->downcall.trailer_buf);
121 		od->error = -EIO;
122 		return -EIO;
123 	}
124 
125 	resp = (struct orangefs_readdir_response_s *)
126 	    op->downcall.trailer_buf;
127 	od->token = resp->token;
128 	return 0;
129 }
130 
131 static int parse_readdir(struct orangefs_dir *od,
132     struct orangefs_kernel_op_s *op)
133 {
134 	struct orangefs_dir_part *part, *new;
135 	size_t count;
136 
137 	count = 1;
138 	part = od->part;
139 	while (part) {
140 		count++;
141 		if (part->next)
142 			part = part->next;
143 		else
144 			break;
145 	}
146 
147 	new = (void *)op->downcall.trailer_buf;
148 	new->next = NULL;
149 	new->len = op->downcall.trailer_size -
150 	    sizeof(struct orangefs_readdir_response_s);
151 	if (!od->part)
152 		od->part = new;
153 	else
154 		part->next = new;
155 	count++;
156 	od->end = count << PART_SHIFT;
157 
158 	return 0;
159 }
160 
161 static int orangefs_dir_more(struct orangefs_inode_s *oi,
162     struct orangefs_dir *od, struct dentry *dentry)
163 {
164 	struct orangefs_kernel_op_s *op;
165 	int r;
166 
167 	op = op_alloc(ORANGEFS_VFS_OP_READDIR);
168 	if (!op) {
169 		od->error = -ENOMEM;
170 		return -ENOMEM;
171 	}
172 	r = do_readdir(oi, od, dentry, op);
173 	if (r) {
174 		od->error = r;
175 		goto out;
176 	}
177 	r = parse_readdir(od, op);
178 	if (r) {
179 		od->error = r;
180 		goto out;
181 	}
182 
183 	od->error = 0;
184 out:
185 	op_release(op);
186 	return od->error;
187 }
188 
189 static int fill_from_part(struct orangefs_dir_part *part,
190     struct dir_context *ctx)
191 {
192 	const int offset = sizeof(struct orangefs_readdir_response_s);
193 	struct orangefs_khandle *khandle;
194 	__u32 *len, padlen;
195 	loff_t i;
196 	char *s;
197 	i = ctx->pos & ~PART_MASK;
198 
199 	/* The file offset from userspace is too large. */
200 	if (i > part->len)
201 		return 1;
202 
203 	/*
204 	 * If the seek pointer is positioned just before an entry it
205 	 * should find the next entry.
206 	 */
207 	if (i % 8)
208 		i = i + (8 - i%8)%8;
209 
210 	while (i < part->len) {
211 		if (part->len < i + sizeof *len)
212 			break;
213 		len = (void *)part + offset + i;
214 		/*
215 		 * len is the size of the string itself.  padlen is the
216 		 * total size of the encoded string.
217 		 */
218 		padlen = (sizeof *len + *len + 1) +
219 		    (8 - (sizeof *len + *len + 1)%8)%8;
220 		if (part->len < i + padlen + sizeof *khandle)
221 			goto next;
222 		s = (void *)part + offset + i + sizeof *len;
223 		if (s[*len] != 0)
224 			goto next;
225 		khandle = (void *)part + offset + i + padlen;
226 		if (!dir_emit(ctx, s, *len,
227 		    orangefs_khandle_to_ino(khandle),
228 		    DT_UNKNOWN))
229 			return 0;
230 		i += padlen + sizeof *khandle;
231 		i = i + (8 - i%8)%8;
232 		BUG_ON(i > part->len);
233 		ctx->pos = (ctx->pos & PART_MASK) | i;
234 		continue;
235 next:
236 		i += 8;
237 	}
238 	return 1;
239 }
240 
241 static int orangefs_dir_fill(struct orangefs_inode_s *oi,
242     struct orangefs_dir *od, struct dentry *dentry,
243     struct dir_context *ctx)
244 {
245 	struct orangefs_dir_part *part;
246 	size_t count;
247 
248 	count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
249 
250 	part = od->part;
251 	while (part->next && count) {
252 		count--;
253 		part = part->next;
254 	}
255 	/* This means the userspace file offset is invalid. */
256 	if (count) {
257 		od->error = -EIO;
258 		return -EIO;
259 	}
260 
261 	while (part && part->len) {
262 		int r;
263 		r = fill_from_part(part, ctx);
264 		if (r < 0) {
265 			od->error = r;
266 			return r;
267 		} else if (r == 0) {
268 			/* Userspace buffer is full. */
269 			break;
270 		} else {
271 			/*
272 			 * The part ran out of data.  Move to the next
273 			 * part. */
274 			ctx->pos = (ctx->pos & PART_MASK) +
275 			    (1 << PART_SHIFT);
276 			part = part->next;
277 		}
278 	}
279 	return 0;
280 }
281 
282 static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
283     int whence)
284 {
285 	struct orangefs_dir *od = file->private_data;
286 	/*
287 	 * Delete the stored data so userspace sees new directory
288 	 * entries.
289 	 */
290 	if (!whence && offset < od->end) {
291 		struct orangefs_dir_part *part = od->part;
292 		while (part) {
293 			struct orangefs_dir_part *next = part->next;
294 			vfree(part);
295 			part = next;
296 		}
297 		od->token = ORANGEFS_ITERATE_START;
298 		od->part = NULL;
299 		od->end = 1 << PART_SHIFT;
300 	}
301 	return default_llseek(file, offset, whence);
302 }
303 
304 static int orangefs_dir_iterate(struct file *file,
305     struct dir_context *ctx)
306 {
307 	struct orangefs_inode_s *oi;
308 	struct orangefs_dir *od;
309 	struct dentry *dentry;
310 	int r;
311 
312 	dentry = file->f_path.dentry;
313 	oi = ORANGEFS_I(dentry->d_inode);
314 	od = file->private_data;
315 
316 	if (od->error)
317 		return od->error;
318 
319 	if (ctx->pos == 0) {
320 		if (!dir_emit_dot(file, ctx))
321 			return 0;
322 		ctx->pos++;
323 	}
324 	if (ctx->pos == 1) {
325 		if (!dir_emit_dotdot(file, ctx))
326 			return 0;
327 		ctx->pos = 1 << PART_SHIFT;
328 	}
329 
330 	/*
331 	 * The seek position is in the first synthesized part but is not
332 	 * valid.
333 	 */
334 	if ((ctx->pos & PART_MASK) == 0)
335 		return -EIO;
336 
337 	r = 0;
338 
339 	/*
340 	 * Must read more if the user has sought past what has been read
341 	 * so far.  Stop a user who has sought past the end.
342 	 */
343 	while (od->token != ORANGEFS_ITERATE_END &&
344 	    ctx->pos > od->end) {
345 		r = orangefs_dir_more(oi, od, dentry);
346 		if (r)
347 			return r;
348 	}
349 	if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
350 		return -EIO;
351 
352 	/* Then try to fill if there's any left in the buffer. */
353 	if (ctx->pos < od->end) {
354 		r = orangefs_dir_fill(oi, od, dentry, ctx);
355 		if (r)
356 			return r;
357 	}
358 
359 	/* Finally get some more and try to fill. */
360 	if (od->token != ORANGEFS_ITERATE_END) {
361 		r = orangefs_dir_more(oi, od, dentry);
362 		if (r)
363 			return r;
364 		r = orangefs_dir_fill(oi, od, dentry, ctx);
365 	}
366 
367 	return r;
368 }
369 
370 static int orangefs_dir_open(struct inode *inode, struct file *file)
371 {
372 	struct orangefs_dir *od;
373 	file->private_data = kmalloc(sizeof(struct orangefs_dir),
374 	    GFP_KERNEL);
375 	if (!file->private_data)
376 		return -ENOMEM;
377 	od = file->private_data;
378 	od->token = ORANGEFS_ITERATE_START;
379 	od->part = NULL;
380 	od->end = 1 << PART_SHIFT;
381 	od->error = 0;
382 	return 0;
383 }
384 
385 static int orangefs_dir_release(struct inode *inode, struct file *file)
386 {
387 	struct orangefs_dir *od = file->private_data;
388 	struct orangefs_dir_part *part = od->part;
389 	while (part) {
390 		struct orangefs_dir_part *next = part->next;
391 		vfree(part);
392 		part = next;
393 	}
394 	kfree(od);
395 	return 0;
396 }
397 
398 const struct file_operations orangefs_dir_operations = {
399 	.llseek = orangefs_dir_llseek,
400 	.read = generic_read_dir,
401 	.iterate_shared = orangefs_dir_iterate,
402 	.open = orangefs_dir_open,
403 	.release = orangefs_dir_release
404 };
405