xref: /linux/fs/orangefs/dir.c (revision 5cd2340cb6a383d04fd88e48fabc2a21a909d6a1)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright 2017 Omnibond Systems, L.L.C.
4  */
5 
6 #include "protocol.h"
7 #include "orangefs-kernel.h"
8 #include "orangefs-bufmap.h"
9 
10 struct orangefs_dir_part {
11 	struct orangefs_dir_part *next;
12 	size_t len;
13 };
14 
15 struct orangefs_dir {
16 	__u64 token;
17 	struct orangefs_dir_part *part;
18 	loff_t end;
19 	int error;
20 };
21 
22 #define PART_SHIFT (24)
23 #define PART_SIZE (1<<24)
24 #define PART_MASK (~(PART_SIZE - 1))
25 
26 /*
27  * There can be up to 512 directory entries.  Each entry is encoded as
28  * follows:
29  * 4 bytes: string size (n)
30  * n bytes: string
31  * 1 byte: trailing zero
32  * padding to 8 bytes
33  * 16 bytes: khandle
34  * padding to 8 bytes
35  *
36  * The trailer_buf starts with a struct orangefs_readdir_response_s
37  * which must be skipped to get to the directory data.
38  *
39  * The data which is received from the userspace daemon is termed a
40  * part and is stored in a linked list in case more than one part is
41  * needed for a large directory.
42  *
43  * The position pointer (ctx->pos) encodes the part and offset on which
44  * to begin reading at.  Bits above PART_SHIFT encode the part and bits
45  * below PART_SHIFT encode the offset.  Parts are stored in a linked
46  * list which grows as data is received from the server.  The overhead
47  * associated with managing the list is presumed to be small compared to
48  * the overhead of communicating with the server.
49  *
50  * As data is received from the server, it is placed at the end of the
51  * part list.  Data is parsed from the current position as it is needed.
52  * When data is determined to be corrupt, it is either because the
53  * userspace component has sent back corrupt data or because the file
54  * pointer has been moved to an invalid location.  Since the two cannot
55  * be differentiated, return EIO.
56  *
57  * Part zero is synthesized to contains `.' and `..'.  Part one is the
58  * first part of the part list.
59  */
60 
61 static int do_readdir(struct orangefs_dir *od, struct inode *inode,
62     struct orangefs_kernel_op_s *op)
63 {
64 	struct orangefs_inode_s *oi = ORANGEFS_I(inode);
65 	struct orangefs_readdir_response_s *resp;
66 	int bufi, r;
67 
68 	/*
69 	 * Despite the badly named field, readdir does not use shared
70 	 * memory.  However, there are a limited number of readdir
71 	 * slots, which must be allocated here.  This flag simply tells
72 	 * the op scheduler to return the op here for retry.
73 	 */
74 	op->uses_shared_memory = 1;
75 	op->upcall.req.readdir.refn = oi->refn;
76 	op->upcall.req.readdir.token = od->token;
77 	op->upcall.req.readdir.max_dirent_count =
78 	    ORANGEFS_MAX_DIRENT_COUNT_READDIR;
79 
80 again:
81 	bufi = orangefs_readdir_index_get();
82 	if (bufi < 0) {
83 		od->error = bufi;
84 		return bufi;
85 	}
86 
87 	op->upcall.req.readdir.buf_index = bufi;
88 
89 	r = service_operation(op, "orangefs_readdir",
90 	    get_interruptible_flag(inode));
91 
92 	orangefs_readdir_index_put(bufi);
93 
94 	if (op_state_purged(op)) {
95 		if (r == -EAGAIN) {
96 			vfree(op->downcall.trailer_buf);
97 			goto again;
98 		} else if (r == -EIO) {
99 			vfree(op->downcall.trailer_buf);
100 			od->error = r;
101 			return r;
102 		}
103 	}
104 
105 	if (r < 0) {
106 		vfree(op->downcall.trailer_buf);
107 		od->error = r;
108 		return r;
109 	} else if (op->downcall.status) {
110 		vfree(op->downcall.trailer_buf);
111 		od->error = op->downcall.status;
112 		return op->downcall.status;
113 	}
114 
115 	/*
116 	 * The maximum size is size per entry times the 512 entries plus
117 	 * the header.  This is well under the limit.
118 	 */
119 	if (op->downcall.trailer_size > PART_SIZE) {
120 		vfree(op->downcall.trailer_buf);
121 		od->error = -EIO;
122 		return -EIO;
123 	}
124 
125 	resp = (struct orangefs_readdir_response_s *)
126 	    op->downcall.trailer_buf;
127 	od->token = resp->token;
128 	return 0;
129 }
130 
131 static int parse_readdir(struct orangefs_dir *od,
132     struct orangefs_kernel_op_s *op)
133 {
134 	struct orangefs_dir_part *part, *new;
135 	size_t count;
136 
137 	count = 1;
138 	part = od->part;
139 	while (part) {
140 		count++;
141 		if (part->next)
142 			part = part->next;
143 		else
144 			break;
145 	}
146 
147 	new = (void *)op->downcall.trailer_buf;
148 	new->next = NULL;
149 	new->len = op->downcall.trailer_size -
150 	    sizeof(struct orangefs_readdir_response_s);
151 	if (!od->part)
152 		od->part = new;
153 	else
154 		part->next = new;
155 	count++;
156 	od->end = count << PART_SHIFT;
157 
158 	return 0;
159 }
160 
161 static int orangefs_dir_more(struct orangefs_dir *od, struct inode *inode)
162 {
163 	struct orangefs_kernel_op_s *op;
164 	int r;
165 
166 	op = op_alloc(ORANGEFS_VFS_OP_READDIR);
167 	if (!op) {
168 		od->error = -ENOMEM;
169 		return -ENOMEM;
170 	}
171 	r = do_readdir(od, inode, op);
172 	if (r) {
173 		od->error = r;
174 		goto out;
175 	}
176 	r = parse_readdir(od, op);
177 	if (r) {
178 		od->error = r;
179 		goto out;
180 	}
181 
182 	od->error = 0;
183 out:
184 	op_release(op);
185 	return od->error;
186 }
187 
188 static int fill_from_part(struct orangefs_dir_part *part,
189     struct dir_context *ctx)
190 {
191 	const int offset = sizeof(struct orangefs_readdir_response_s);
192 	struct orangefs_khandle *khandle;
193 	__u32 *len, padlen;
194 	loff_t i;
195 	char *s;
196 	i = ctx->pos & ~PART_MASK;
197 
198 	/* The file offset from userspace is too large. */
199 	if (i > part->len)
200 		return 1;
201 
202 	/*
203 	 * If the seek pointer is positioned just before an entry it
204 	 * should find the next entry.
205 	 */
206 	if (i % 8)
207 		i = i + (8 - i%8)%8;
208 
209 	while (i < part->len) {
210 		if (part->len < i + sizeof *len)
211 			break;
212 		len = (void *)part + offset + i;
213 		/*
214 		 * len is the size of the string itself.  padlen is the
215 		 * total size of the encoded string.
216 		 */
217 		padlen = (sizeof *len + *len + 1) +
218 		    (8 - (sizeof *len + *len + 1)%8)%8;
219 		if (part->len < i + padlen + sizeof *khandle)
220 			goto next;
221 		s = (void *)part + offset + i + sizeof *len;
222 		if (s[*len] != 0)
223 			goto next;
224 		khandle = (void *)part + offset + i + padlen;
225 		if (!dir_emit(ctx, s, *len,
226 		    orangefs_khandle_to_ino(khandle),
227 		    DT_UNKNOWN))
228 			return 0;
229 		i += padlen + sizeof *khandle;
230 		i = i + (8 - i%8)%8;
231 		BUG_ON(i > part->len);
232 		ctx->pos = (ctx->pos & PART_MASK) | i;
233 		continue;
234 next:
235 		i += 8;
236 	}
237 	return 1;
238 }
239 
240 static int orangefs_dir_fill(struct orangefs_dir *od, struct dir_context *ctx)
241 {
242 	struct orangefs_dir_part *part;
243 	size_t count;
244 
245 	count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
246 
247 	part = od->part;
248 	while (part->next && count) {
249 		count--;
250 		part = part->next;
251 	}
252 	/* This means the userspace file offset is invalid. */
253 	if (count) {
254 		od->error = -EIO;
255 		return -EIO;
256 	}
257 
258 	while (part && part->len) {
259 		int r;
260 		r = fill_from_part(part, ctx);
261 		if (r < 0) {
262 			od->error = r;
263 			return r;
264 		} else if (r == 0) {
265 			/* Userspace buffer is full. */
266 			break;
267 		} else {
268 			/*
269 			 * The part ran out of data.  Move to the next
270 			 * part. */
271 			ctx->pos = (ctx->pos & PART_MASK) +
272 			    (1 << PART_SHIFT);
273 			part = part->next;
274 		}
275 	}
276 	return 0;
277 }
278 
279 static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
280     int whence)
281 {
282 	struct orangefs_dir *od = file->private_data;
283 	/*
284 	 * Delete the stored data so userspace sees new directory
285 	 * entries.
286 	 */
287 	if (!whence && offset < od->end) {
288 		struct orangefs_dir_part *part = od->part;
289 		while (part) {
290 			struct orangefs_dir_part *next = part->next;
291 			vfree(part);
292 			part = next;
293 		}
294 		od->token = ORANGEFS_ITERATE_START;
295 		od->part = NULL;
296 		od->end = 1 << PART_SHIFT;
297 	}
298 	return default_llseek(file, offset, whence);
299 }
300 
301 static int orangefs_dir_iterate(struct file *file,
302     struct dir_context *ctx)
303 {
304 	struct orangefs_dir *od = file->private_data;
305 	struct inode *inode = file_inode(file);
306 	int r;
307 
308 	if (od->error)
309 		return od->error;
310 
311 	if (ctx->pos == 0) {
312 		if (!dir_emit_dot(file, ctx))
313 			return 0;
314 		ctx->pos++;
315 	}
316 	if (ctx->pos == 1) {
317 		if (!dir_emit_dotdot(file, ctx))
318 			return 0;
319 		ctx->pos = 1 << PART_SHIFT;
320 	}
321 
322 	/*
323 	 * The seek position is in the first synthesized part but is not
324 	 * valid.
325 	 */
326 	if ((ctx->pos & PART_MASK) == 0)
327 		return -EIO;
328 
329 	r = 0;
330 
331 	/*
332 	 * Must read more if the user has sought past what has been read
333 	 * so far.  Stop a user who has sought past the end.
334 	 */
335 	while (od->token != ORANGEFS_ITERATE_END &&
336 	    ctx->pos > od->end) {
337 		r = orangefs_dir_more(od, inode);
338 		if (r)
339 			return r;
340 	}
341 	if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
342 		return -EIO;
343 
344 	/* Then try to fill if there's any left in the buffer. */
345 	if (ctx->pos < od->end) {
346 		r = orangefs_dir_fill(od, ctx);
347 		if (r)
348 			return r;
349 	}
350 
351 	/* Finally get some more and try to fill. */
352 	if (od->token != ORANGEFS_ITERATE_END) {
353 		r = orangefs_dir_more(od, inode);
354 		if (r)
355 			return r;
356 		r = orangefs_dir_fill(od, ctx);
357 	}
358 
359 	return r;
360 }
361 
362 static int orangefs_dir_open(struct inode *inode, struct file *file)
363 {
364 	struct orangefs_dir *od;
365 	file->private_data = kmalloc(sizeof(struct orangefs_dir),
366 	    GFP_KERNEL);
367 	if (!file->private_data)
368 		return -ENOMEM;
369 	od = file->private_data;
370 	od->token = ORANGEFS_ITERATE_START;
371 	od->part = NULL;
372 	od->end = 1 << PART_SHIFT;
373 	od->error = 0;
374 	return 0;
375 }
376 
377 static int orangefs_dir_release(struct inode *inode, struct file *file)
378 {
379 	struct orangefs_dir *od = file->private_data;
380 	struct orangefs_dir_part *part = od->part;
381 	while (part) {
382 		struct orangefs_dir_part *next = part->next;
383 		vfree(part);
384 		part = next;
385 	}
386 	kfree(od);
387 	return 0;
388 }
389 
390 const struct file_operations orangefs_dir_operations = {
391 	.llseek = orangefs_dir_llseek,
392 	.read = generic_read_dir,
393 	.iterate_shared = orangefs_dir_iterate,
394 	.open = orangefs_dir_open,
395 	.release = orangefs_dir_release
396 };
397