1 /* 2 * Copyright 2017 Omnibond Systems, L.L.C. 3 */ 4 5 #include "protocol.h" 6 #include "orangefs-kernel.h" 7 #include "orangefs-bufmap.h" 8 9 struct orangefs_dir_part { 10 struct orangefs_dir_part *next; 11 size_t len; 12 }; 13 14 struct orangefs_dir { 15 __u64 token; 16 struct orangefs_dir_part *part; 17 loff_t end; 18 int error; 19 }; 20 21 #define PART_SHIFT (24) 22 #define PART_SIZE (1<<24) 23 #define PART_MASK (~(PART_SIZE - 1)) 24 25 /* 26 * There can be up to 512 directory entries. Each entry is encoded as 27 * follows: 28 * 4 bytes: string size (n) 29 * n bytes: string 30 * 1 byte: trailing zero 31 * padding to 8 bytes 32 * 16 bytes: khandle 33 * padding to 8 bytes 34 * 35 * The trailer_buf starts with a struct orangefs_readdir_response_s 36 * which must be skipped to get to the directory data. 37 * 38 * The data which is received from the userspace daemon is termed a 39 * part and is stored in a linked list in case more than one part is 40 * needed for a large directory. 41 * 42 * The position pointer (ctx->pos) encodes the part and offset on which 43 * to begin reading at. Bits above PART_SHIFT encode the part and bits 44 * below PART_SHIFT encode the offset. Parts are stored in a linked 45 * list which grows as data is received from the server. The overhead 46 * associated with managing the list is presumed to be small compared to 47 * the overhead of communicating with the server. 48 * 49 * As data is received from the server, it is placed at the end of the 50 * part list. Data is parsed from the current position as it is needed. 51 * When data is determined to be corrupt, it is either because the 52 * userspace component has sent back corrupt data or because the file 53 * pointer has been moved to an invalid location. Since the two cannot 54 * be differentiated, return EIO. 55 * 56 * Part zero is synthesized to contains `.' and `..'. Part one is the 57 * first part of the part list. 58 */ 59 60 static int do_readdir(struct orangefs_inode_s *oi, 61 struct orangefs_dir *od, struct dentry *dentry, 62 struct orangefs_kernel_op_s *op) 63 { 64 struct orangefs_readdir_response_s *resp; 65 int bufi, r; 66 67 /* 68 * Despite the badly named field, readdir does not use shared 69 * memory. However, there are a limited number of readdir 70 * slots, which must be allocated here. This flag simply tells 71 * the op scheduler to return the op here for retry. 72 */ 73 op->uses_shared_memory = 1; 74 op->upcall.req.readdir.refn = oi->refn; 75 op->upcall.req.readdir.token = od->token; 76 op->upcall.req.readdir.max_dirent_count = 77 ORANGEFS_MAX_DIRENT_COUNT_READDIR; 78 79 again: 80 bufi = orangefs_readdir_index_get(); 81 if (bufi < 0) { 82 od->error = bufi; 83 return bufi; 84 } 85 86 op->upcall.req.readdir.buf_index = bufi; 87 88 r = service_operation(op, "orangefs_readdir", 89 get_interruptible_flag(dentry->d_inode)); 90 91 orangefs_readdir_index_put(bufi); 92 93 if (op_state_purged(op)) { 94 if (r == -EAGAIN) { 95 vfree(op->downcall.trailer_buf); 96 goto again; 97 } else if (r == -EIO) { 98 vfree(op->downcall.trailer_buf); 99 od->error = r; 100 return r; 101 } 102 } 103 104 if (r < 0) { 105 vfree(op->downcall.trailer_buf); 106 od->error = r; 107 return r; 108 } else if (op->downcall.status) { 109 vfree(op->downcall.trailer_buf); 110 od->error = op->downcall.status; 111 return op->downcall.status; 112 } 113 114 /* 115 * The maximum size is size per entry times the 512 entries plus 116 * the header. This is well under the limit. 117 */ 118 if (op->downcall.trailer_size > PART_SIZE) { 119 vfree(op->downcall.trailer_buf); 120 od->error = -EIO; 121 return -EIO; 122 } 123 124 resp = (struct orangefs_readdir_response_s *) 125 op->downcall.trailer_buf; 126 od->token = resp->token; 127 return 0; 128 } 129 130 static int parse_readdir(struct orangefs_dir *od, 131 struct orangefs_kernel_op_s *op) 132 { 133 struct orangefs_dir_part *part, *new; 134 size_t count; 135 136 count = 1; 137 part = od->part; 138 while (part) { 139 count++; 140 if (part->next) 141 part = part->next; 142 else 143 break; 144 } 145 146 new = (void *)op->downcall.trailer_buf; 147 new->next = NULL; 148 new->len = op->downcall.trailer_size - 149 sizeof(struct orangefs_readdir_response_s); 150 if (!od->part) 151 od->part = new; 152 else 153 part->next = new; 154 count++; 155 od->end = count << PART_SHIFT; 156 157 return 0; 158 } 159 160 static int orangefs_dir_more(struct orangefs_inode_s *oi, 161 struct orangefs_dir *od, struct dentry *dentry) 162 { 163 struct orangefs_kernel_op_s *op; 164 int r; 165 166 op = op_alloc(ORANGEFS_VFS_OP_READDIR); 167 if (!op) { 168 od->error = -ENOMEM; 169 return -ENOMEM; 170 } 171 r = do_readdir(oi, od, dentry, op); 172 if (r) { 173 od->error = r; 174 goto out; 175 } 176 r = parse_readdir(od, op); 177 if (r) { 178 od->error = r; 179 goto out; 180 } 181 182 od->error = 0; 183 out: 184 op_release(op); 185 return od->error; 186 } 187 188 static int fill_from_part(struct orangefs_dir_part *part, 189 struct dir_context *ctx) 190 { 191 const int offset = sizeof(struct orangefs_readdir_response_s); 192 struct orangefs_khandle *khandle; 193 __u32 *len, padlen; 194 loff_t i; 195 char *s; 196 i = ctx->pos & ~PART_MASK; 197 198 /* The file offset from userspace is too large. */ 199 if (i > part->len) 200 return 1; 201 202 /* 203 * If the seek pointer is positioned just before an entry it 204 * should find the next entry. 205 */ 206 if (i % 8) 207 i = i + (8 - i%8)%8; 208 209 while (i < part->len) { 210 if (part->len < i + sizeof *len) 211 break; 212 len = (void *)part + offset + i; 213 /* 214 * len is the size of the string itself. padlen is the 215 * total size of the encoded string. 216 */ 217 padlen = (sizeof *len + *len + 1) + 218 (8 - (sizeof *len + *len + 1)%8)%8; 219 if (part->len < i + padlen + sizeof *khandle) 220 goto next; 221 s = (void *)part + offset + i + sizeof *len; 222 if (s[*len] != 0) 223 goto next; 224 khandle = (void *)part + offset + i + padlen; 225 if (!dir_emit(ctx, s, *len, 226 orangefs_khandle_to_ino(khandle), 227 DT_UNKNOWN)) 228 return 0; 229 i += padlen + sizeof *khandle; 230 i = i + (8 - i%8)%8; 231 BUG_ON(i > part->len); 232 ctx->pos = (ctx->pos & PART_MASK) | i; 233 continue; 234 next: 235 i += 8; 236 } 237 return 1; 238 } 239 240 static int orangefs_dir_fill(struct orangefs_inode_s *oi, 241 struct orangefs_dir *od, struct dentry *dentry, 242 struct dir_context *ctx) 243 { 244 struct orangefs_dir_part *part; 245 size_t count; 246 247 count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1; 248 249 part = od->part; 250 while (part->next && count) { 251 count--; 252 part = part->next; 253 } 254 /* This means the userspace file offset is invalid. */ 255 if (count) { 256 od->error = -EIO; 257 return -EIO; 258 } 259 260 while (part && part->len) { 261 int r; 262 r = fill_from_part(part, ctx); 263 if (r < 0) { 264 od->error = r; 265 return r; 266 } else if (r == 0) { 267 /* Userspace buffer is full. */ 268 break; 269 } else { 270 /* 271 * The part ran out of data. Move to the next 272 * part. */ 273 ctx->pos = (ctx->pos & PART_MASK) + 274 (1 << PART_SHIFT); 275 part = part->next; 276 } 277 } 278 return 0; 279 } 280 281 static loff_t orangefs_dir_llseek(struct file *file, loff_t offset, 282 int whence) 283 { 284 struct orangefs_dir *od = file->private_data; 285 /* 286 * Delete the stored data so userspace sees new directory 287 * entries. 288 */ 289 if (!whence && offset < od->end) { 290 struct orangefs_dir_part *part = od->part; 291 while (part) { 292 struct orangefs_dir_part *next = part->next; 293 vfree(part); 294 part = next; 295 } 296 od->token = ORANGEFS_ITERATE_START; 297 od->part = NULL; 298 od->end = 1 << PART_SHIFT; 299 } 300 return default_llseek(file, offset, whence); 301 } 302 303 static int orangefs_dir_iterate(struct file *file, 304 struct dir_context *ctx) 305 { 306 struct orangefs_inode_s *oi; 307 struct orangefs_dir *od; 308 struct dentry *dentry; 309 int r; 310 311 dentry = file->f_path.dentry; 312 oi = ORANGEFS_I(dentry->d_inode); 313 od = file->private_data; 314 315 if (od->error) 316 return od->error; 317 318 if (ctx->pos == 0) { 319 if (!dir_emit_dot(file, ctx)) 320 return 0; 321 ctx->pos++; 322 } 323 if (ctx->pos == 1) { 324 if (!dir_emit_dotdot(file, ctx)) 325 return 0; 326 ctx->pos = 1 << PART_SHIFT; 327 } 328 329 /* 330 * The seek position is in the first synthesized part but is not 331 * valid. 332 */ 333 if ((ctx->pos & PART_MASK) == 0) 334 return -EIO; 335 336 r = 0; 337 338 /* 339 * Must read more if the user has sought past what has been read 340 * so far. Stop a user who has sought past the end. 341 */ 342 while (od->token != ORANGEFS_ITERATE_END && 343 ctx->pos > od->end) { 344 r = orangefs_dir_more(oi, od, dentry); 345 if (r) 346 return r; 347 } 348 if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end) 349 return -EIO; 350 351 /* Then try to fill if there's any left in the buffer. */ 352 if (ctx->pos < od->end) { 353 r = orangefs_dir_fill(oi, od, dentry, ctx); 354 if (r) 355 return r; 356 } 357 358 /* Finally get some more and try to fill. */ 359 if (od->token != ORANGEFS_ITERATE_END) { 360 r = orangefs_dir_more(oi, od, dentry); 361 if (r) 362 return r; 363 r = orangefs_dir_fill(oi, od, dentry, ctx); 364 } 365 366 return r; 367 } 368 369 static int orangefs_dir_open(struct inode *inode, struct file *file) 370 { 371 struct orangefs_dir *od; 372 file->private_data = kmalloc(sizeof(struct orangefs_dir), 373 GFP_KERNEL); 374 if (!file->private_data) 375 return -ENOMEM; 376 od = file->private_data; 377 od->token = ORANGEFS_ITERATE_START; 378 od->part = NULL; 379 od->end = 1 << PART_SHIFT; 380 od->error = 0; 381 return 0; 382 } 383 384 static int orangefs_dir_release(struct inode *inode, struct file *file) 385 { 386 struct orangefs_dir *od = file->private_data; 387 struct orangefs_dir_part *part = od->part; 388 orangefs_flush_inode(inode); 389 while (part) { 390 struct orangefs_dir_part *next = part->next; 391 vfree(part); 392 part = next; 393 } 394 kfree(od); 395 return 0; 396 } 397 398 const struct file_operations orangefs_dir_operations = { 399 .llseek = orangefs_dir_llseek, 400 .read = generic_read_dir, 401 .iterate = orangefs_dir_iterate, 402 .open = orangefs_dir_open, 403 .release = orangefs_dir_release 404 }; 405