xref: /linux/fs/orangefs/dir.c (revision e58e871becec2d3b04ed91c0c16fe8deac9c9dfa)
1 /*
2  * Copyright 2017 Omnibond Systems, L.L.C.
3  */
4 
5 #include "protocol.h"
6 #include "orangefs-kernel.h"
7 #include "orangefs-bufmap.h"
8 
9 struct orangefs_dir_part {
10 	struct orangefs_dir_part *next;
11 	size_t len;
12 };
13 
14 struct orangefs_dir {
15 	__u64 token;
16 	struct orangefs_dir_part *part;
17 	loff_t end;
18 	int error;
19 };
20 
21 #define PART_SHIFT (24)
22 #define PART_SIZE (1<<24)
23 #define PART_MASK (~(PART_SIZE - 1))
24 
25 /*
26  * There can be up to 512 directory entries.  Each entry is encoded as
27  * follows:
28  * 4 bytes: string size (n)
29  * n bytes: string
30  * 1 byte: trailing zero
31  * padding to 8 bytes
32  * 16 bytes: khandle
33  * padding to 8 bytes
34  *
35  * The trailer_buf starts with a struct orangefs_readdir_response_s
36  * which must be skipped to get to the directory data.
37  *
38  * The data which is received from the userspace daemon is termed a
39  * part and is stored in a linked list in case more than one part is
40  * needed for a large directory.
41  *
42  * The position pointer (ctx->pos) encodes the part and offset on which
43  * to begin reading at.  Bits above PART_SHIFT encode the part and bits
44  * below PART_SHIFT encode the offset.  Parts are stored in a linked
45  * list which grows as data is received from the server.  The overhead
46  * associated with managing the list is presumed to be small compared to
47  * the overhead of communicating with the server.
48  *
49  * As data is received from the server, it is placed at the end of the
50  * part list.  Data is parsed from the current position as it is needed.
51  * When data is determined to be corrupt, it is either because the
52  * userspace component has sent back corrupt data or because the file
53  * pointer has been moved to an invalid location.  Since the two cannot
54  * be differentiated, return EIO.
55  *
56  * Part zero is synthesized to contains `.' and `..'.  Part one is the
57  * first part of the part list.
58  */
59 
60 static int do_readdir(struct orangefs_inode_s *oi,
61     struct orangefs_dir *od, struct dentry *dentry,
62     struct orangefs_kernel_op_s *op)
63 {
64 	struct orangefs_readdir_response_s *resp;
65 	int bufi, r;
66 
67 	/*
68 	 * Despite the badly named field, readdir does not use shared
69 	 * memory.  However, there are a limited number of readdir
70 	 * slots, which must be allocated here.  This flag simply tells
71 	 * the op scheduler to return the op here for retry.
72 	 */
73 	op->uses_shared_memory = 1;
74 	op->upcall.req.readdir.refn = oi->refn;
75 	op->upcall.req.readdir.token = od->token;
76 	op->upcall.req.readdir.max_dirent_count =
77 	    ORANGEFS_MAX_DIRENT_COUNT_READDIR;
78 
79 again:
80 	bufi = orangefs_readdir_index_get();
81 	if (bufi < 0) {
82 		od->error = bufi;
83 		return bufi;
84 	}
85 
86 	op->upcall.req.readdir.buf_index = bufi;
87 
88 	r = service_operation(op, "orangefs_readdir",
89 	    get_interruptible_flag(dentry->d_inode));
90 
91 	orangefs_readdir_index_put(bufi);
92 
93 	if (op_state_purged(op)) {
94 		if (r == -EAGAIN) {
95 			vfree(op->downcall.trailer_buf);
96 			goto again;
97 		} else if (r == -EIO) {
98 			vfree(op->downcall.trailer_buf);
99 			od->error = r;
100 			return r;
101 		}
102 	}
103 
104 	if (r < 0) {
105 		vfree(op->downcall.trailer_buf);
106 		od->error = r;
107 		return r;
108 	} else if (op->downcall.status) {
109 		vfree(op->downcall.trailer_buf);
110 		od->error = op->downcall.status;
111 		return op->downcall.status;
112 	}
113 
114 	/*
115 	 * The maximum size is size per entry times the 512 entries plus
116 	 * the header.  This is well under the limit.
117 	 */
118 	if (op->downcall.trailer_size > PART_SIZE) {
119 		vfree(op->downcall.trailer_buf);
120 		od->error = -EIO;
121 		return -EIO;
122 	}
123 
124 	resp = (struct orangefs_readdir_response_s *)
125 	    op->downcall.trailer_buf;
126 	od->token = resp->token;
127 	return 0;
128 }
129 
130 static int parse_readdir(struct orangefs_dir *od,
131     struct orangefs_kernel_op_s *op)
132 {
133 	struct orangefs_dir_part *part, *new;
134 	size_t count;
135 
136 	count = 1;
137 	part = od->part;
138 	while (part) {
139 		count++;
140 		if (part->next)
141 			part = part->next;
142 		else
143 			break;
144 	}
145 
146 	new = (void *)op->downcall.trailer_buf;
147 	new->next = NULL;
148 	new->len = op->downcall.trailer_size -
149 	    sizeof(struct orangefs_readdir_response_s);
150 	if (!od->part)
151 		od->part = new;
152 	else
153 		part->next = new;
154 	count++;
155 	od->end = count << PART_SHIFT;
156 
157 	return 0;
158 }
159 
160 static int orangefs_dir_more(struct orangefs_inode_s *oi,
161     struct orangefs_dir *od, struct dentry *dentry)
162 {
163 	struct orangefs_kernel_op_s *op;
164 	int r;
165 
166 	op = op_alloc(ORANGEFS_VFS_OP_READDIR);
167 	if (!op) {
168 		od->error = -ENOMEM;
169 		return -ENOMEM;
170 	}
171 	r = do_readdir(oi, od, dentry, op);
172 	if (r) {
173 		od->error = r;
174 		goto out;
175 	}
176 	r = parse_readdir(od, op);
177 	if (r) {
178 		od->error = r;
179 		goto out;
180 	}
181 
182 	od->error = 0;
183 out:
184 	op_release(op);
185 	return od->error;
186 }
187 
188 static int fill_from_part(struct orangefs_dir_part *part,
189     struct dir_context *ctx)
190 {
191 	const int offset = sizeof(struct orangefs_readdir_response_s);
192 	struct orangefs_khandle *khandle;
193 	__u32 *len, padlen;
194 	loff_t i;
195 	char *s;
196 	i = ctx->pos & ~PART_MASK;
197 
198 	/* The file offset from userspace is too large. */
199 	if (i > part->len)
200 		return 1;
201 
202 	/*
203 	 * If the seek pointer is positioned just before an entry it
204 	 * should find the next entry.
205 	 */
206 	if (i % 8)
207 		i = i + (8 - i%8)%8;
208 
209 	while (i < part->len) {
210 		if (part->len < i + sizeof *len)
211 			break;
212 		len = (void *)part + offset + i;
213 		/*
214 		 * len is the size of the string itself.  padlen is the
215 		 * total size of the encoded string.
216 		 */
217 		padlen = (sizeof *len + *len + 1) +
218 		    (8 - (sizeof *len + *len + 1)%8)%8;
219 		if (part->len < i + padlen + sizeof *khandle)
220 			goto next;
221 		s = (void *)part + offset + i + sizeof *len;
222 		if (s[*len] != 0)
223 			goto next;
224 		khandle = (void *)part + offset + i + padlen;
225 		if (!dir_emit(ctx, s, *len,
226 		    orangefs_khandle_to_ino(khandle),
227 		    DT_UNKNOWN))
228 			return 0;
229 		i += padlen + sizeof *khandle;
230 		i = i + (8 - i%8)%8;
231 		BUG_ON(i > part->len);
232 		ctx->pos = (ctx->pos & PART_MASK) | i;
233 		continue;
234 next:
235 		i += 8;
236 	}
237 	return 1;
238 }
239 
240 static int orangefs_dir_fill(struct orangefs_inode_s *oi,
241     struct orangefs_dir *od, struct dentry *dentry,
242     struct dir_context *ctx)
243 {
244 	struct orangefs_dir_part *part;
245 	size_t count;
246 
247 	count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
248 
249 	part = od->part;
250 	while (part->next && count) {
251 		count--;
252 		part = part->next;
253 	}
254 	/* This means the userspace file offset is invalid. */
255 	if (count) {
256 		od->error = -EIO;
257 		return -EIO;
258 	}
259 
260 	while (part && part->len) {
261 		int r;
262 		r = fill_from_part(part, ctx);
263 		if (r < 0) {
264 			od->error = r;
265 			return r;
266 		} else if (r == 0) {
267 			/* Userspace buffer is full. */
268 			break;
269 		} else {
270 			/*
271 			 * The part ran out of data.  Move to the next
272 			 * part. */
273 			ctx->pos = (ctx->pos & PART_MASK) +
274 			    (1 << PART_SHIFT);
275 			part = part->next;
276 		}
277 	}
278 	return 0;
279 }
280 
281 static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
282     int whence)
283 {
284 	struct orangefs_dir *od = file->private_data;
285 	/*
286 	 * Delete the stored data so userspace sees new directory
287 	 * entries.
288 	 */
289 	if (!whence && offset < od->end) {
290 		struct orangefs_dir_part *part = od->part;
291 		while (part) {
292 			struct orangefs_dir_part *next = part->next;
293 			vfree(part);
294 			part = next;
295 		}
296 		od->token = ORANGEFS_ITERATE_START;
297 		od->part = NULL;
298 		od->end = 1 << PART_SHIFT;
299 	}
300 	return default_llseek(file, offset, whence);
301 }
302 
303 static int orangefs_dir_iterate(struct file *file,
304     struct dir_context *ctx)
305 {
306 	struct orangefs_inode_s *oi;
307 	struct orangefs_dir *od;
308 	struct dentry *dentry;
309 	int r;
310 
311 	dentry = file->f_path.dentry;
312 	oi = ORANGEFS_I(dentry->d_inode);
313 	od = file->private_data;
314 
315 	if (od->error)
316 		return od->error;
317 
318 	if (ctx->pos == 0) {
319 		if (!dir_emit_dot(file, ctx))
320 			return 0;
321 		ctx->pos++;
322 	}
323 	if (ctx->pos == 1) {
324 		if (!dir_emit_dotdot(file, ctx))
325 			return 0;
326 		ctx->pos = 1 << PART_SHIFT;
327 	}
328 
329 	/*
330 	 * The seek position is in the first synthesized part but is not
331 	 * valid.
332 	 */
333 	if ((ctx->pos & PART_MASK) == 0)
334 		return -EIO;
335 
336 	r = 0;
337 
338 	/*
339 	 * Must read more if the user has sought past what has been read
340 	 * so far.  Stop a user who has sought past the end.
341 	 */
342 	while (od->token != ORANGEFS_ITERATE_END &&
343 	    ctx->pos > od->end) {
344 		r = orangefs_dir_more(oi, od, dentry);
345 		if (r)
346 			return r;
347 	}
348 	if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
349 		return -EIO;
350 
351 	/* Then try to fill if there's any left in the buffer. */
352 	if (ctx->pos < od->end) {
353 		r = orangefs_dir_fill(oi, od, dentry, ctx);
354 		if (r)
355 			return r;
356 	}
357 
358 	/* Finally get some more and try to fill. */
359 	if (od->token != ORANGEFS_ITERATE_END) {
360 		r = orangefs_dir_more(oi, od, dentry);
361 		if (r)
362 			return r;
363 		r = orangefs_dir_fill(oi, od, dentry, ctx);
364 	}
365 
366 	return r;
367 }
368 
369 static int orangefs_dir_open(struct inode *inode, struct file *file)
370 {
371 	struct orangefs_dir *od;
372 	file->private_data = kmalloc(sizeof(struct orangefs_dir),
373 	    GFP_KERNEL);
374 	if (!file->private_data)
375 		return -ENOMEM;
376 	od = file->private_data;
377 	od->token = ORANGEFS_ITERATE_START;
378 	od->part = NULL;
379 	od->end = 1 << PART_SHIFT;
380 	od->error = 0;
381 	return 0;
382 }
383 
384 static int orangefs_dir_release(struct inode *inode, struct file *file)
385 {
386 	struct orangefs_dir *od = file->private_data;
387 	struct orangefs_dir_part *part = od->part;
388 	orangefs_flush_inode(inode);
389 	while (part) {
390 		struct orangefs_dir_part *next = part->next;
391 		vfree(part);
392 		part = next;
393 	}
394 	kfree(od);
395 	return 0;
396 }
397 
398 const struct file_operations orangefs_dir_operations = {
399 	.llseek = orangefs_dir_llseek,
400 	.read = generic_read_dir,
401 	.iterate = orangefs_dir_iterate,
402 	.open = orangefs_dir_open,
403 	.release = orangefs_dir_release
404 };
405