xref: /linux/fs/orangefs/dir.c (revision a59511d1daa6406eede58a79f99250ffcd9a3566)
1 /*
2  * (C) 2001 Clemson University and The University of Chicago
3  *
4  * See COPYING in top-level directory.
5  */
6 
7 #include "protocol.h"
8 #include "orangefs-kernel.h"
9 #include "orangefs-bufmap.h"
10 
11 /*
12  * decode routine used by kmod to deal with the blob sent from
13  * userspace for readdirs. The blob contains zero or more of these
14  * sub-blobs:
15  *   __u32 - represents length of the character string that follows.
16  *   string - between 1 and ORANGEFS_NAME_MAX bytes long.
17  *   padding - (if needed) to cause the __u32 plus the string to be
18  *             eight byte aligned.
19  *   khandle - sizeof(khandle) bytes.
20  */
21 static long decode_dirents(char *ptr, size_t size,
22                            struct orangefs_readdir_response_s *readdir)
23 {
24 	int i;
25 	struct orangefs_readdir_response_s *rd =
26 		(struct orangefs_readdir_response_s *) ptr;
27 	char *buf = ptr;
28 	int khandle_size = sizeof(struct orangefs_khandle);
29 	size_t offset = offsetof(struct orangefs_readdir_response_s,
30 				dirent_array);
31 	/* 8 reflects eight byte alignment */
32 	int smallest_blob = khandle_size + 8;
33 	__u32 len;
34 	int aligned_len;
35 	int sizeof_u32 = sizeof(__u32);
36 	long ret;
37 
38 	gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size);
39 
40 	/* size is = offset on empty dirs, > offset on non-empty dirs... */
41 	if (size < offset) {
42 		gossip_err("%s: size:%zu: offset:%zu:\n",
43 			   __func__,
44 			   size,
45 			   offset);
46 		ret = -EINVAL;
47 		goto out;
48 	}
49 
50         if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) {
51 		gossip_err("%s: size:%zu: dirent_outcount:%d:\n",
52 			   __func__,
53 			   size,
54 			   readdir->orangefs_dirent_outcount);
55 		ret = -EINVAL;
56 		goto out;
57 	}
58 
59 	readdir->token = rd->token;
60 	readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount;
61 	readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount,
62 					sizeof(*readdir->dirent_array),
63 					GFP_KERNEL);
64 	if (readdir->dirent_array == NULL) {
65 		gossip_err("%s: kcalloc failed.\n", __func__);
66 		ret = -ENOMEM;
67 		goto out;
68 	}
69 
70 	buf += offset;
71 	size -= offset;
72 
73 	for (i = 0; i < readdir->orangefs_dirent_outcount; i++) {
74 		if (size < smallest_blob) {
75 			gossip_err("%s: size:%zu: smallest_blob:%d:\n",
76 				   __func__,
77 				   size,
78 				   smallest_blob);
79 			ret = -EINVAL;
80 			goto free;
81 		}
82 
83 		len = *(__u32 *)buf;
84 		if ((len < 1) || (len > ORANGEFS_NAME_MAX)) {
85 			gossip_err("%s: len:%d:\n", __func__, len);
86 			ret = -EINVAL;
87 			goto free;
88 		}
89 
90 		gossip_debug(GOSSIP_DIR_DEBUG,
91 			     "%s: size:%zu: len:%d:\n",
92 			     __func__,
93 			     size,
94 			     len);
95 
96 		readdir->dirent_array[i].d_name = buf + sizeof_u32;
97 		readdir->dirent_array[i].d_length = len;
98 
99 		/*
100 		 * Calculate "aligned" length of this string and its
101 		 * associated __u32 descriptor.
102 		 */
103 		aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7;
104 		gossip_debug(GOSSIP_DIR_DEBUG,
105 			     "%s: aligned_len:%d:\n",
106 			     __func__,
107 			     aligned_len);
108 
109 		/*
110 		 * The end of the blob should coincide with the end
111 		 * of the last sub-blob.
112 		 */
113 		if (size < aligned_len + khandle_size) {
114 			gossip_err("%s: ran off the end of the blob.\n",
115 				   __func__);
116 			ret = -EINVAL;
117 			goto free;
118 		}
119 		size -= aligned_len + khandle_size;
120 
121 		buf += aligned_len;
122 
123 		readdir->dirent_array[i].khandle =
124 			*(struct orangefs_khandle *) buf;
125 		buf += khandle_size;
126 	}
127 	ret = buf - ptr;
128 	gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret);
129 	goto out;
130 
131 free:
132 	kfree(readdir->dirent_array);
133 	readdir->dirent_array = NULL;
134 
135 out:
136 	return ret;
137 }
138 
139 /*
140  * Read directory entries from an instance of an open directory.
141  */
142 static int orangefs_readdir(struct file *file, struct dir_context *ctx)
143 {
144 	int ret = 0;
145 	int buffer_index;
146 	/*
147 	 * ptoken supports Orangefs' distributed directory logic, added
148 	 * in 2.9.2.
149 	 */
150 	__u64 *ptoken = file->private_data;
151 	__u64 pos = 0;
152 	ino_t ino = 0;
153 	struct dentry *dentry = file->f_path.dentry;
154 	struct orangefs_kernel_op_s *new_op = NULL;
155 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode);
156 	int buffer_full = 0;
157 	struct orangefs_readdir_response_s readdir_response;
158 	void *dents_buf;
159 	int i = 0;
160 	int len = 0;
161 	ino_t current_ino = 0;
162 	char *current_entry = NULL;
163 	long bytes_decoded;
164 
165 	gossip_debug(GOSSIP_DIR_DEBUG,
166 		     "%s: ctx->pos:%lld, ptoken = %llu\n",
167 		     __func__,
168 		     lld(ctx->pos),
169 		     llu(*ptoken));
170 
171 	pos = (__u64) ctx->pos;
172 
173 	/* are we done? */
174 	if (pos == ORANGEFS_READDIR_END) {
175 		gossip_debug(GOSSIP_DIR_DEBUG,
176 			     "Skipping to termination path\n");
177 		return 0;
178 	}
179 
180 	gossip_debug(GOSSIP_DIR_DEBUG,
181 		     "orangefs_readdir called on %s (pos=%llu)\n",
182 		     dentry->d_name.name, llu(pos));
183 
184 	memset(&readdir_response, 0, sizeof(readdir_response));
185 
186 	new_op = op_alloc(ORANGEFS_VFS_OP_READDIR);
187 	if (!new_op)
188 		return -ENOMEM;
189 
190 	/*
191 	 * Only the indices are shared. No memory is actually shared, but the
192 	 * mechanism is used.
193 	 */
194 	new_op->uses_shared_memory = 1;
195 	new_op->upcall.req.readdir.refn = orangefs_inode->refn;
196 	new_op->upcall.req.readdir.max_dirent_count =
197 	    ORANGEFS_MAX_DIRENT_COUNT_READDIR;
198 
199 	gossip_debug(GOSSIP_DIR_DEBUG,
200 		     "%s: upcall.req.readdir.refn.khandle: %pU\n",
201 		     __func__,
202 		     &new_op->upcall.req.readdir.refn.khandle);
203 
204 	new_op->upcall.req.readdir.token = *ptoken;
205 
206 get_new_buffer_index:
207 	buffer_index = orangefs_readdir_index_get();
208 	if (buffer_index < 0) {
209 		ret = buffer_index;
210 		gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n",
211 			    ret);
212 		goto out_free_op;
213 	}
214 	new_op->upcall.req.readdir.buf_index = buffer_index;
215 
216 	ret = service_operation(new_op,
217 				"orangefs_readdir",
218 				get_interruptible_flag(dentry->d_inode));
219 
220 	gossip_debug(GOSSIP_DIR_DEBUG,
221 		     "Readdir downcall status is %d.  ret:%d\n",
222 		     new_op->downcall.status,
223 		     ret);
224 
225 	orangefs_readdir_index_put(buffer_index);
226 
227 	if (ret == -EAGAIN && op_state_purged(new_op)) {
228 		/* Client-core indices are invalid after it restarted. */
229 		gossip_debug(GOSSIP_DIR_DEBUG,
230 			"%s: Getting new buffer_index for retry of readdir..\n",
231 			 __func__);
232 		goto get_new_buffer_index;
233 	}
234 
235 	if (ret == -EIO && op_state_purged(new_op)) {
236 		gossip_err("%s: Client is down. Aborting readdir call.\n",
237 			__func__);
238 		goto out_slot;
239 	}
240 
241 	if (ret < 0 || new_op->downcall.status != 0) {
242 		gossip_debug(GOSSIP_DIR_DEBUG,
243 			     "Readdir request failed.  Status:%d\n",
244 			     new_op->downcall.status);
245 		if (ret >= 0)
246 			ret = new_op->downcall.status;
247 		goto out_slot;
248 	}
249 
250 	dents_buf = new_op->downcall.trailer_buf;
251 	if (dents_buf == NULL) {
252 		gossip_err("Invalid NULL buffer in readdir response\n");
253 		ret = -ENOMEM;
254 		goto out_slot;
255 	}
256 
257 	bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size,
258 					&readdir_response);
259 	if (bytes_decoded < 0) {
260 		ret = bytes_decoded;
261 		gossip_err("Could not decode readdir from buffer %d\n", ret);
262 		goto out_vfree;
263 	}
264 
265 	if (bytes_decoded != new_op->downcall.trailer_size) {
266 		gossip_err("orangefs_readdir: # bytes decoded (%ld) "
267 			   "!= trailer size (%ld)\n",
268 			   bytes_decoded,
269 			   (long)new_op->downcall.trailer_size);
270 		ret = -EINVAL;
271 		goto out_destroy_handle;
272 	}
273 
274 	/*
275 	 *  orangefs doesn't actually store dot and dot-dot, but
276 	 *  we need to have them represented.
277 	 */
278 	if (pos == 0) {
279 		ino = get_ino_from_khandle(dentry->d_inode);
280 		gossip_debug(GOSSIP_DIR_DEBUG,
281 			     "%s: calling dir_emit of \".\" with pos = %llu\n",
282 			     __func__,
283 			     llu(pos));
284 		ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
285 		pos += 1;
286 	}
287 
288 	if (pos == 1) {
289 		ino = get_parent_ino_from_dentry(dentry);
290 		gossip_debug(GOSSIP_DIR_DEBUG,
291 			     "%s: calling dir_emit of \"..\" with pos = %llu\n",
292 			     __func__,
293 			     llu(pos));
294 		ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
295 		pos += 1;
296 	}
297 
298 	/*
299 	 * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around
300 	 * to prevent "finding" dot and dot-dot on any iteration
301 	 * other than the first.
302 	 */
303 	if (ctx->pos == ORANGEFS_ITERATE_NEXT)
304 		ctx->pos = 0;
305 
306 	gossip_debug(GOSSIP_DIR_DEBUG,
307 		     "%s: dirent_outcount:%d:\n",
308 		     __func__,
309 		     readdir_response.orangefs_dirent_outcount);
310 	for (i = ctx->pos;
311 	     i < readdir_response.orangefs_dirent_outcount;
312 	     i++) {
313 		len = readdir_response.dirent_array[i].d_length;
314 		current_entry = readdir_response.dirent_array[i].d_name;
315 		current_ino = orangefs_khandle_to_ino(
316 			&readdir_response.dirent_array[i].khandle);
317 
318 		gossip_debug(GOSSIP_DIR_DEBUG,
319 			     "calling dir_emit for %s with len %d"
320 			     ", ctx->pos %ld\n",
321 			     current_entry,
322 			     len,
323 			     (unsigned long)ctx->pos);
324 		/*
325 		 * type is unknown. We don't return object type
326 		 * in the dirent_array. This leaves getdents
327 		 * clueless about type.
328 		 */
329 		ret =
330 		    dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
331 		if (!ret)
332 			break;
333 		ctx->pos++;
334 		gossip_debug(GOSSIP_DIR_DEBUG,
335 			      "%s: ctx->pos:%lld\n",
336 			      __func__,
337 			      lld(ctx->pos));
338 
339 	}
340 
341 	/*
342 	 * we ran all the way through the last batch, set up for
343 	 * getting another batch...
344 	 */
345 	if (ret) {
346 		*ptoken = readdir_response.token;
347 		ctx->pos = ORANGEFS_ITERATE_NEXT;
348 	}
349 
350 	/*
351 	 * Did we hit the end of the directory?
352 	 */
353 	if (readdir_response.token == ORANGEFS_READDIR_END &&
354 	    !buffer_full) {
355 		gossip_debug(GOSSIP_DIR_DEBUG,
356 		"End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n");
357 		ctx->pos = ORANGEFS_READDIR_END;
358 	}
359 
360 out_destroy_handle:
361 	/* kfree(NULL) is safe */
362 	kfree(readdir_response.dirent_array);
363 out_vfree:
364 	gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf);
365 	vfree(dents_buf);
366 out_slot:
367 	orangefs_readdir_index_put(buffer_index);
368 out_free_op:
369 	op_release(new_op);
370 	gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret);
371 	return ret;
372 }
373 
374 static int orangefs_dir_open(struct inode *inode, struct file *file)
375 {
376 	__u64 *ptoken;
377 
378 	file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
379 	if (!file->private_data)
380 		return -ENOMEM;
381 
382 	ptoken = file->private_data;
383 	*ptoken = ORANGEFS_READDIR_START;
384 	return 0;
385 }
386 
387 static int orangefs_dir_release(struct inode *inode, struct file *file)
388 {
389 	orangefs_flush_inode(inode);
390 	kfree(file->private_data);
391 	return 0;
392 }
393 
394 /** ORANGEFS implementation of VFS directory operations */
395 const struct file_operations orangefs_dir_operations = {
396 	.read = generic_read_dir,
397 	.iterate = orangefs_readdir,
398 	.open = orangefs_dir_open,
399 	.release = orangefs_dir_release,
400 };
401