xref: /linux/fs/orangefs/dir.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /*
2  * (C) 2001 Clemson University and The University of Chicago
3  *
4  * See COPYING in top-level directory.
5  */
6 
7 #include "protocol.h"
8 #include "orangefs-kernel.h"
9 #include "orangefs-bufmap.h"
10 
11 /*
12  * decode routine used by kmod to deal with the blob sent from
13  * userspace for readdirs. The blob contains zero or more of these
14  * sub-blobs:
15  *   __u32 - represents length of the character string that follows.
16  *   string - between 1 and ORANGEFS_NAME_MAX bytes long.
17  *   padding - (if needed) to cause the __u32 plus the string to be
18  *             eight byte aligned.
19  *   khandle - sizeof(khandle) bytes.
20  */
21 static long decode_dirents(char *ptr, size_t size,
22                            struct orangefs_readdir_response_s *readdir)
23 {
24 	int i;
25 	struct orangefs_readdir_response_s *rd =
26 		(struct orangefs_readdir_response_s *) ptr;
27 	char *buf = ptr;
28 	int khandle_size = sizeof(struct orangefs_khandle);
29 	size_t offset = offsetof(struct orangefs_readdir_response_s,
30 				dirent_array);
31 	/* 8 reflects eight byte alignment */
32 	int smallest_blob = khandle_size + 8;
33 	__u32 len;
34 	int aligned_len;
35 	int sizeof_u32 = sizeof(__u32);
36 	long ret;
37 
38 	gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size);
39 
40 	/* size is = offset on empty dirs, > offset on non-empty dirs... */
41 	if (size < offset) {
42 		gossip_err("%s: size:%zu: offset:%zu:\n",
43 			   __func__,
44 			   size,
45 			   offset);
46 		ret = -EINVAL;
47 		goto out;
48 	}
49 
50         if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) {
51 		gossip_err("%s: size:%zu: dirent_outcount:%d:\n",
52 			   __func__,
53 			   size,
54 			   readdir->orangefs_dirent_outcount);
55 		ret = -EINVAL;
56 		goto out;
57 	}
58 
59 	readdir->token = rd->token;
60 	readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount;
61 	readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount,
62 					sizeof(*readdir->dirent_array),
63 					GFP_KERNEL);
64 	if (readdir->dirent_array == NULL) {
65 		gossip_err("%s: kcalloc failed.\n", __func__);
66 		ret = -ENOMEM;
67 		goto out;
68 	}
69 
70 	buf += offset;
71 	size -= offset;
72 
73 	for (i = 0; i < readdir->orangefs_dirent_outcount; i++) {
74 		if (size < smallest_blob) {
75 			gossip_err("%s: size:%zu: smallest_blob:%d:\n",
76 				   __func__,
77 				   size,
78 				   smallest_blob);
79 			ret = -EINVAL;
80 			goto free;
81 		}
82 
83 		len = *(__u32 *)buf;
84 		if ((len < 1) || (len > ORANGEFS_NAME_MAX)) {
85 			gossip_err("%s: len:%d:\n", __func__, len);
86 			ret = -EINVAL;
87 			goto free;
88 		}
89 
90 		gossip_debug(GOSSIP_DIR_DEBUG,
91 			     "%s: size:%zu: len:%d:\n",
92 			     __func__,
93 			     size,
94 			     len);
95 
96 		readdir->dirent_array[i].d_name = buf + sizeof_u32;
97 		readdir->dirent_array[i].d_length = len;
98 
99 		/*
100 		 * Calculate "aligned" length of this string and its
101 		 * associated __u32 descriptor.
102 		 */
103 		aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7;
104 		gossip_debug(GOSSIP_DIR_DEBUG,
105 			     "%s: aligned_len:%d:\n",
106 			     __func__,
107 			     aligned_len);
108 
109 		/*
110 		 * The end of the blob should coincide with the end
111 		 * of the last sub-blob.
112 		 */
113 		if (size < aligned_len + khandle_size) {
114 			gossip_err("%s: ran off the end of the blob.\n",
115 				   __func__);
116 			ret = -EINVAL;
117 			goto free;
118 		}
119 		size -= aligned_len + khandle_size;
120 
121 		buf += aligned_len;
122 
123 		readdir->dirent_array[i].khandle =
124 			*(struct orangefs_khandle *) buf;
125 		buf += khandle_size;
126 	}
127 	ret = buf - ptr;
128 	gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret);
129 	goto out;
130 
131 free:
132 	kfree(readdir->dirent_array);
133 	readdir->dirent_array = NULL;
134 
135 out:
136 	return ret;
137 }
138 
139 /*
140  * Read directory entries from an instance of an open directory.
141  */
142 static int orangefs_readdir(struct file *file, struct dir_context *ctx)
143 {
144 	int ret = 0;
145 	int buffer_index;
146 	/*
147 	 * ptoken supports Orangefs' distributed directory logic, added
148 	 * in 2.9.2.
149 	 */
150 	__u64 *ptoken = file->private_data;
151 	__u64 pos = 0;
152 	ino_t ino = 0;
153 	struct dentry *dentry = file->f_path.dentry;
154 	struct orangefs_kernel_op_s *new_op = NULL;
155 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode);
156 	struct orangefs_readdir_response_s readdir_response;
157 	void *dents_buf;
158 	int i = 0;
159 	int len = 0;
160 	ino_t current_ino = 0;
161 	char *current_entry = NULL;
162 	long bytes_decoded;
163 
164 	gossip_debug(GOSSIP_DIR_DEBUG,
165 		     "%s: ctx->pos:%lld, ptoken = %llu\n",
166 		     __func__,
167 		     lld(ctx->pos),
168 		     llu(*ptoken));
169 
170 	pos = (__u64) ctx->pos;
171 
172 	/* are we done? */
173 	if (pos == ORANGEFS_READDIR_END) {
174 		gossip_debug(GOSSIP_DIR_DEBUG,
175 			     "Skipping to termination path\n");
176 		return 0;
177 	}
178 
179 	gossip_debug(GOSSIP_DIR_DEBUG,
180 		     "orangefs_readdir called on %s (pos=%llu)\n",
181 		     dentry->d_name.name, llu(pos));
182 
183 	memset(&readdir_response, 0, sizeof(readdir_response));
184 
185 	new_op = op_alloc(ORANGEFS_VFS_OP_READDIR);
186 	if (!new_op)
187 		return -ENOMEM;
188 
189 	/*
190 	 * Only the indices are shared. No memory is actually shared, but the
191 	 * mechanism is used.
192 	 */
193 	new_op->uses_shared_memory = 1;
194 	new_op->upcall.req.readdir.refn = orangefs_inode->refn;
195 	new_op->upcall.req.readdir.max_dirent_count =
196 	    ORANGEFS_MAX_DIRENT_COUNT_READDIR;
197 
198 	gossip_debug(GOSSIP_DIR_DEBUG,
199 		     "%s: upcall.req.readdir.refn.khandle: %pU\n",
200 		     __func__,
201 		     &new_op->upcall.req.readdir.refn.khandle);
202 
203 	new_op->upcall.req.readdir.token = *ptoken;
204 
205 get_new_buffer_index:
206 	buffer_index = orangefs_readdir_index_get();
207 	if (buffer_index < 0) {
208 		ret = buffer_index;
209 		gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n",
210 			    ret);
211 		goto out_free_op;
212 	}
213 	new_op->upcall.req.readdir.buf_index = buffer_index;
214 
215 	ret = service_operation(new_op,
216 				"orangefs_readdir",
217 				get_interruptible_flag(dentry->d_inode));
218 
219 	gossip_debug(GOSSIP_DIR_DEBUG,
220 		     "Readdir downcall status is %d.  ret:%d\n",
221 		     new_op->downcall.status,
222 		     ret);
223 
224 	orangefs_readdir_index_put(buffer_index);
225 
226 	if (ret == -EAGAIN && op_state_purged(new_op)) {
227 		/* Client-core indices are invalid after it restarted. */
228 		gossip_debug(GOSSIP_DIR_DEBUG,
229 			"%s: Getting new buffer_index for retry of readdir..\n",
230 			 __func__);
231 		goto get_new_buffer_index;
232 	}
233 
234 	if (ret == -EIO && op_state_purged(new_op)) {
235 		gossip_err("%s: Client is down. Aborting readdir call.\n",
236 			__func__);
237 		goto out_free_op;
238 	}
239 
240 	if (ret < 0 || new_op->downcall.status != 0) {
241 		gossip_debug(GOSSIP_DIR_DEBUG,
242 			     "Readdir request failed.  Status:%d\n",
243 			     new_op->downcall.status);
244 		if (ret >= 0)
245 			ret = new_op->downcall.status;
246 		goto out_free_op;
247 	}
248 
249 	dents_buf = new_op->downcall.trailer_buf;
250 	if (dents_buf == NULL) {
251 		gossip_err("Invalid NULL buffer in readdir response\n");
252 		ret = -ENOMEM;
253 		goto out_free_op;
254 	}
255 
256 	bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size,
257 					&readdir_response);
258 	if (bytes_decoded < 0) {
259 		ret = bytes_decoded;
260 		gossip_err("Could not decode readdir from buffer %d\n", ret);
261 		goto out_vfree;
262 	}
263 
264 	if (bytes_decoded != new_op->downcall.trailer_size) {
265 		gossip_err("orangefs_readdir: # bytes decoded (%ld) "
266 			   "!= trailer size (%ld)\n",
267 			   bytes_decoded,
268 			   (long)new_op->downcall.trailer_size);
269 		ret = -EINVAL;
270 		goto out_destroy_handle;
271 	}
272 
273 	/*
274 	 *  orangefs doesn't actually store dot and dot-dot, but
275 	 *  we need to have them represented.
276 	 */
277 	if (pos == 0) {
278 		ino = get_ino_from_khandle(dentry->d_inode);
279 		gossip_debug(GOSSIP_DIR_DEBUG,
280 			     "%s: calling dir_emit of \".\" with pos = %llu\n",
281 			     __func__,
282 			     llu(pos));
283 		ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
284 		pos += 1;
285 	}
286 
287 	if (pos == 1) {
288 		ino = get_parent_ino_from_dentry(dentry);
289 		gossip_debug(GOSSIP_DIR_DEBUG,
290 			     "%s: calling dir_emit of \"..\" with pos = %llu\n",
291 			     __func__,
292 			     llu(pos));
293 		ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
294 		pos += 1;
295 	}
296 
297 	/*
298 	 * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around
299 	 * to prevent "finding" dot and dot-dot on any iteration
300 	 * other than the first.
301 	 */
302 	if (ctx->pos == ORANGEFS_ITERATE_NEXT)
303 		ctx->pos = 0;
304 
305 	gossip_debug(GOSSIP_DIR_DEBUG,
306 		     "%s: dirent_outcount:%d:\n",
307 		     __func__,
308 		     readdir_response.orangefs_dirent_outcount);
309 	for (i = ctx->pos;
310 	     i < readdir_response.orangefs_dirent_outcount;
311 	     i++) {
312 		len = readdir_response.dirent_array[i].d_length;
313 		current_entry = readdir_response.dirent_array[i].d_name;
314 		current_ino = orangefs_khandle_to_ino(
315 			&readdir_response.dirent_array[i].khandle);
316 
317 		gossip_debug(GOSSIP_DIR_DEBUG,
318 			     "calling dir_emit for %s with len %d"
319 			     ", ctx->pos %ld\n",
320 			     current_entry,
321 			     len,
322 			     (unsigned long)ctx->pos);
323 		/*
324 		 * type is unknown. We don't return object type
325 		 * in the dirent_array. This leaves getdents
326 		 * clueless about type.
327 		 */
328 		ret =
329 		    dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
330 		if (!ret)
331 			break;
332 		ctx->pos++;
333 		gossip_debug(GOSSIP_DIR_DEBUG,
334 			      "%s: ctx->pos:%lld\n",
335 			      __func__,
336 			      lld(ctx->pos));
337 
338 	}
339 
340 	/*
341 	 * we ran all the way through the last batch, set up for
342 	 * getting another batch...
343 	 */
344 	if (ret) {
345 		*ptoken = readdir_response.token;
346 		ctx->pos = ORANGEFS_ITERATE_NEXT;
347 	}
348 
349 	/*
350 	 * Did we hit the end of the directory?
351 	 */
352 	if (readdir_response.token == ORANGEFS_READDIR_END) {
353 		gossip_debug(GOSSIP_DIR_DEBUG,
354 		"End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n");
355 		ctx->pos = ORANGEFS_READDIR_END;
356 	}
357 
358 out_destroy_handle:
359 	/* kfree(NULL) is safe */
360 	kfree(readdir_response.dirent_array);
361 out_vfree:
362 	gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf);
363 	vfree(dents_buf);
364 out_free_op:
365 	op_release(new_op);
366 	gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret);
367 	return ret;
368 }
369 
370 static int orangefs_dir_open(struct inode *inode, struct file *file)
371 {
372 	__u64 *ptoken;
373 
374 	file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
375 	if (!file->private_data)
376 		return -ENOMEM;
377 
378 	ptoken = file->private_data;
379 	*ptoken = ORANGEFS_READDIR_START;
380 	return 0;
381 }
382 
383 static int orangefs_dir_release(struct inode *inode, struct file *file)
384 {
385 	orangefs_flush_inode(inode);
386 	kfree(file->private_data);
387 	return 0;
388 }
389 
390 /** ORANGEFS implementation of VFS directory operations */
391 const struct file_operations orangefs_dir_operations = {
392 	.read = generic_read_dir,
393 	.iterate = orangefs_readdir,
394 	.open = orangefs_dir_open,
395 	.release = orangefs_dir_release,
396 };
397